summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c7
-rw-r--r--net/802/hippi.c12
-rw-r--r--net/802/psnap.c4
-rw-r--r--net/802/tr.c9
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/8021q/vlanproc.c36
-rw-r--r--net/Kconfig22
-rw-r--r--net/Makefile4
-rw-r--r--net/appletalk/aarp.c14
-rw-r--r--net/appletalk/ddp.c29
-rw-r--r--net/atm/br2684.c8
-rw-r--r--net/atm/clip.c13
-rw-r--r--net/atm/ioctl.c3
-rw-r--r--net/atm/lec.c15
-rw-r--r--net/atm/mpc.c17
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/ax25/Kconfig61
-rw-r--r--net/ax25/af_ax25.c114
-rw-r--r--net/ax25/ax25_ds_subr.c2
-rw-r--r--net/ax25/ax25_in.c24
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_out.c12
-rw-r--r--net/ax25/ax25_subr.c4
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/bnep/core.c17
-rw-r--r--net/bluetooth/cmtp/core.c4
-rw-r--r--net/bluetooth/hci_conn.c36
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c8
-rw-r--r--net/bluetooth/hci_sock.c11
-rw-r--r--net/bluetooth/hci_sysfs.c9
-rw-r--r--net/bluetooth/hidp/core.c25
-rw-r--r--net/bluetooth/l2cap.c82
-rw-r--r--net/bluetooth/rfcomm/core.c35
-rw-r--r--net/bluetooth/rfcomm/tty.c11
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br.c12
-rw-r--r--net/bridge/br_device.c22
-rw-r--r--net/bridge/br_fdb.c46
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_input.c51
-rw-r--r--net/bridge/br_ioctl.c9
-rw-r--r--net/bridge/br_netfilter.c184
-rw-r--r--net/bridge/br_netlink.c27
-rw-r--r--net/bridge/br_notify.c13
-rw-r--r--net/bridge/br_private.h23
-rw-r--r--net/bridge/br_stp.c11
-rw-r--r--net/bridge/br_stp_bpdu.c19
-rw-r--r--net/bridge/br_stp_if.c69
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/br_sysfs_br.c20
-rw-r--r--net/bridge/br_sysfs_if.c8
-rw-r--r--net/bridge/netfilter/ebt_arp.c48
-rw-r--r--net/bridge/netfilter/ebt_log.c12
-rw-r--r--net/bridge/netfilter/ebt_ulog.c12
-rw-r--r--net/compat.c79
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c10
-rw-r--r--net/core/dev.c475
-rw-r--r--net/core/dev_mcast.c7
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c191
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/gen_stats.c4
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c53
-rw-r--r--net/core/net-sysfs.c31
-rw-r--r--net/core/netpoll.c31
-rw-r--r--net/core/pktgen.c303
-rw-r--r--net/core/rtnetlink.c304
-rw-r--r--net/core/skbuff.c431
-rw-r--r--net/core/sock.c777
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c6
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/ccids/ccid3.c322
-rw-r--r--net/dccp/ccids/ccid3.h10
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/dccp.h75
-rw-r--r--net/dccp/input.c54
-rw-r--r--net/dccp/ipv4.c43
-rw-r--r--net/dccp/ipv6.c40
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c18
-rw-r--r--net/dccp/output.c19
-rw-r--r--net/dccp/probe.c17
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dccp/timer.c25
-rw-r--r--net/decnet/af_decnet.c12
-rw-r--r--net/decnet/dn_dev.c116
-rw-r--r--net/decnet/dn_fib.c15
-rw-r--r--net/decnet/dn_neigh.c6
-rw-r--r--net/decnet/dn_nsp_in.c7
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c42
-rw-r--r--net/decnet/dn_rules.c19
-rw-r--r--net/decnet/dn_table.c11
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c8
-rw-r--r--net/econet/af_econet.c15
-rw-r--r--net/ethernet/eth.c5
-rw-r--r--net/ieee80211/Kconfig9
-rw-r--r--net/ieee80211/ieee80211_crypt.c2
-rw-r--r--net/ieee80211/ieee80211_crypt_ccmp.c4
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c6
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c4
-rw-r--r--net/ieee80211/ieee80211_module.c5
-rw-r--r--net/ieee80211/ieee80211_rx.c25
-rw-r--r--net/ieee80211/ieee80211_tx.c12
-rw-r--r--net/ieee80211/ieee80211_wx.c4
-rw-r--r--net/ipv4/Kconfig27
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c149
-rw-r--r--net/ipv4/ah4.c14
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/cipso_ipv4.c50
-rw-r--r--net/ipv4/devinet.c58
-rw-r--r--net/ipv4/esp4.c59
-rw-r--r--net/ipv4/fib_frontend.c34
-rw-r--r--net/ipv4/fib_hash.c2
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c62
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c60
-rw-r--r--net/ipv4/inet_diag.c90
-rw-r--r--net/ipv4/inetpeer.c38
-rw-r--r--net/ipv4/ip_forward.c14
-rw-r--r--net/ipv4/ip_fragment.c47
-rw-r--r--net/ipv4/ip_gre.c63
-rw-r--r--net/ipv4/ip_input.c38
-rw-r--r--net/ipv4/ip_options.c26
-rw-r--r--net/ipv4/ip_output.c129
-rw-r--r--net/ipv4/ip_sockglue.c1169
-rw-r--r--net/ipv4/ipcomp.c58
-rw-r--r--net/ipv4/ipconfig.c21
-rw-r--r--net/ipv4/ipip.c60
-rw-r--r--net/ipv4/ipmr.c418
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c14
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c56
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c26
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c44
-rw-r--r--net/ipv4/multipath_drr.c2
-rw-r--r--net/ipv4/netfilter.c8
-rw-r--r--net/ipv4/netfilter/Kconfig267
-rw-r--r--net/ipv4/netfilter/Makefile45
-rw-r--r--net/ipv4/netfilter/arp_tables.c14
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c229
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c1550
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c1841
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c684
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c314
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c143
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c1577
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c74
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c328
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c315
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c659
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1164
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c148
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c962
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c161
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c85
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c634
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c180
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c436
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_h323.c611
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c350
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c122
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c174
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c87
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c154
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c144
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c55
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c314
-rw-r--r--net/ipv4/netfilter/ip_nat_sip.c282
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c1333
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c385
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c70
-rw-r--r--net/ipv4/netfilter/ip_queue.c28
-rw-r--r--net/ipv4/netfilter/ip_tables.c12
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c26
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c15
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c16
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c57
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c26
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c24
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c45
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c40
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c4
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c80
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c10
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c2
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/ipt_tos.c2
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c11
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c30
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c27
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c37
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c32
-rw-r--r--net/ipv4/proc.c41
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/syncookies.c40
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c162
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c68
-rw-r--r--net/ipv4/tcp_cubic.c81
-rw-r--r--net/ipv4/tcp_highspeed.c24
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c356
-rw-r--r--net/ipv4/tcp_input.c646
-rw-r--r--net/ipv4/tcp_ipv4.c143
-rw-r--r--net/ipv4/tcp_lp.c8
-rw-r--r--net/ipv4/tcp_minisocks.c29
-rw-r--r--net/ipv4/tcp_output.c213
-rw-r--r--net/ipv4/tcp_probe.c68
-rw-r--r--net/ipv4/tcp_timer.c10
-rw-r--r--net/ipv4/tcp_vegas.c57
-rw-r--r--net/ipv4/tcp_vegas.h24
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c21
-rw-r--r--net/ipv4/tcp_yeah.c268
-rw-r--r--net/ipv4/udp.c438
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c23
-rw-r--r--net/ipv4/xfrm4_mode_beet.c53
-rw-r--r--net/ipv4/xfrm4_mode_transport.c28
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c31
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv4/xfrm4_tunnel.c3
-rw-r--r--net/ipv6/Kconfig10
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c294
-rw-r--r--net/ipv6/af_inet6.c89
-rw-r--r--net/ipv6/ah6.c34
-rw-r--r--net/ipv6/anycast.c17
-rw-r--r--net/ipv6/datagram.c63
-rw-r--r--net/ipv6/esp6.c52
-rw-r--r--net/ipv6/exthdrs.c156
-rw-r--r--net/ipv6/fib6_rules.c53
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/ip6_fib.c12
-rw-r--r--net/ipv6/ip6_input.c26
-rw-r--r--net/ipv6/ip6_output.c187
-rw-r--r--net/ipv6/ip6_tunnel.c643
-rw-r--r--net/ipv6/ipcomp6.c16
-rw-r--r--net/ipv6/ipv6_sockglue.c50
-rw-r--r--net/ipv6/ipv6_syms.c36
-rw-r--r--net/ipv6/mcast.c61
-rw-r--r--net/ipv6/mip6.c62
-rw-r--r--net/ipv6/ndisc.c435
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/ip6_queue.c28
-rw-r--r--net/ipv6/netfilter/ip6_tables.c17
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c2
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c21
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c8
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c2
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c30
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c59
-rw-r--r--net/ipv6/proc.c62
-rw-r--r--net/ipv6/protocol.c4
-rw-r--r--net/ipv6/raw.c56
-rw-r--r--net/ipv6/reassembly.c62
-rw-r--r--net/ipv6/route.c171
-rw-r--r--net/ipv6/sit.c58
-rw-r--r--net/ipv6/tcp_ipv6.c121
-rw-r--r--net/ipv6/udp.c125
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_input.c18
-rw-r--r--net/ipv6/xfrm6_mode_beet.c27
-rw-r--r--net/ipv6/xfrm6_mode_ro.c7
-rw-r--r--net/ipv6/xfrm6_mode_transport.c20
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c36
-rw-r--r--net/ipv6/xfrm6_output.c6
-rw-r--r--net/ipv6/xfrm6_policy.c25
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/ipx/af_ipx.c9
-rw-r--r--net/ipx/ipx_route.c4
-rw-r--r--net/irda/af_irda.c140
-rw-r--r--net/irda/ircomm/ircomm_param.c4
-rw-r--r--net/irda/irda_device.c21
-rw-r--r--net/irda/irlan/irlan_common.c2
-rw-r--r--net/irda/irlan/irlan_eth.c3
-rw-r--r--net/irda/irlap_event.c2
-rw-r--r--net/irda/irlap_frame.c18
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_irda.c34
-rw-r--r--net/irda/irqueue.c9
-rw-r--r--net/irda/irttp.c11
-rw-r--r--net/irda/parameters.c8
-rw-r--r--net/irda/qos.c14
-rw-r--r--net/irda/wrapper.c5
-rw-r--r--net/iucv/af_iucv.c195
-rw-r--r--net/iucv/iucv.c256
-rw-r--r--net/key/af_key.c94
-rw-r--r--net/llc/llc_core.c10
-rw-r--r--net/llc/llc_input.c2
-rw-r--r--net/llc/llc_output.c8
-rw-r--r--net/llc/llc_sap.c5
-rw-r--r--net/mac80211/Kconfig78
-rw-r--r--net/mac80211/Makefile20
-rw-r--r--net/mac80211/aes_ccm.c155
-rw-r--r--net/mac80211/aes_ccm.h26
-rw-r--r--net/mac80211/debugfs.c433
-rw-r--r--net/mac80211/debugfs.h16
-rw-r--r--net/mac80211/debugfs_key.c252
-rw-r--r--net/mac80211/debugfs_key.h34
-rw-r--r--net/mac80211/debugfs_netdev.c440
-rw-r--r--net/mac80211/debugfs_netdev.h30
-rw-r--r--net/mac80211/debugfs_sta.c246
-rw-r--r--net/mac80211/debugfs_sta.h12
-rw-r--r--net/mac80211/hostapd_ioctl.h108
-rw-r--r--net/mac80211/ieee80211.c4984
-rw-r--r--net/mac80211/ieee80211_cfg.c66
-rw-r--r--net/mac80211/ieee80211_cfg.h9
-rw-r--r--net/mac80211/ieee80211_common.h98
-rw-r--r--net/mac80211/ieee80211_i.h798
-rw-r--r--net/mac80211/ieee80211_iface.c352
-rw-r--r--net/mac80211/ieee80211_ioctl.c1822
-rw-r--r--net/mac80211/ieee80211_key.h106
-rw-r--r--net/mac80211/ieee80211_led.c91
-rw-r--r--net/mac80211/ieee80211_led.h32
-rw-r--r--net/mac80211/ieee80211_rate.c140
-rw-r--r--net/mac80211/ieee80211_rate.h144
-rw-r--r--net/mac80211/ieee80211_sta.c3060
-rw-r--r--net/mac80211/michael.c104
-rw-r--r--net/mac80211/michael.h20
-rw-r--r--net/mac80211/rc80211_simple.c432
-rw-r--r--net/mac80211/sta_info.c470
-rw-r--r--net/mac80211/sta_info.h164
-rw-r--r--net/mac80211/tkip.c341
-rw-r--r--net/mac80211/tkip.h36
-rw-r--r--net/mac80211/wep.c328
-rw-r--r--net/mac80211/wep.h40
-rw-r--r--net/mac80211/wme.c678
-rw-r--r--net/mac80211/wme.h57
-rw-r--r--net/mac80211/wpa.c660
-rw-r--r--net/mac80211/wpa.h31
-rw-r--r--net/netfilter/Kconfig88
-rw-r--r--net/netfilter/core.c21
-rw-r--r--net/netfilter/nf_conntrack_core.c58
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_ftp.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c66
-rw-r--r--net/netfilter/nf_conntrack_proto.c144
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c88
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c11
-rw-r--r--net/netfilter/nfnetlink.c197
-rw-r--r--net/netfilter/nfnetlink_log.c108
-rw-r--r--net/netfilter/nfnetlink_queue.c20
-rw-r--r--net/netfilter/x_tables.c26
-rw-r--r--net/netfilter/xt_CONNMARK.c32
-rw-r--r--net/netfilter/xt_CONNSECMARK.c18
-rw-r--r--net/netfilter/xt_DSCP.c10
-rw-r--r--net/netfilter/xt_NOTRACK.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c12
-rw-r--r--net/netfilter/xt_connbytes.c35
-rw-r--r--net/netfilter/xt_connmark.c17
-rw-r--r--net/netfilter/xt_conntrack.c110
-rw-r--r--net/netfilter/xt_dscp.c6
-rw-r--r--net/netfilter/xt_hashlimit.c14
-rw-r--r--net/netfilter/xt_helper.c60
-rw-r--r--net/netfilter/xt_length.c5
-rw-r--r--net/netfilter/xt_limit.c7
-rw-r--r--net/netfilter/xt_mac.c4
-rw-r--r--net/netfilter/xt_pkttype.c2
-rw-r--r--net/netfilter/xt_realm.c2
-rw-r--r--net/netfilter/xt_state.c4
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/netlink/af_netlink.c121
-rw-r--r--net/netlink/attr.c5
-rw-r--r--net/netlink/genetlink.c66
-rw-r--r--net/netrom/af_netrom.c115
-rw-r--r--net/netrom/nr_dev.c4
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_loopback.c4
-rw-r--r--net/netrom/nr_out.c8
-rw-r--r--net/netrom/nr_route.c5
-rw-r--r--net/netrom/nr_subr.c4
-rw-r--r--net/packet/af_packet.c94
-rw-r--r--net/rfkill/Kconfig24
-rw-r--r--net/rfkill/Makefile6
-rw-r--r--net/rfkill/rfkill-input.c174
-rw-r--r--net/rfkill/rfkill.c407
-rw-r--r--net/rose/af_rose.c144
-rw-r--r--net/rose/rose_loopback.c2
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rxrpc/Kconfig43
-rw-r--r--net/rxrpc/Makefile40
-rw-r--r--net/rxrpc/af_rxrpc.c879
-rw-r--r--net/rxrpc/ar-accept.c504
-rw-r--r--net/rxrpc/ar-ack.c1306
-rw-r--r--net/rxrpc/ar-call.c804
-rw-r--r--net/rxrpc/ar-connection.c911
-rw-r--r--net/rxrpc/ar-connevent.c403
-rw-r--r--net/rxrpc/ar-error.c255
-rw-r--r--net/rxrpc/ar-input.c797
-rw-r--r--net/rxrpc/ar-internal.h808
-rw-r--r--net/rxrpc/ar-key.c334
-rw-r--r--net/rxrpc/ar-local.c309
-rw-r--r--net/rxrpc/ar-output.c734
-rw-r--r--net/rxrpc/ar-peer.c316
-rw-r--r--net/rxrpc/ar-proc.c247
-rw-r--r--net/rxrpc/ar-recvmsg.c437
-rw-r--r--net/rxrpc/ar-security.c258
-rw-r--r--net/rxrpc/ar-skbuff.c132
-rw-r--r--net/rxrpc/ar-transport.c276
-rw-r--r--net/rxrpc/call.c2277
-rw-r--r--net/rxrpc/connection.c777
-rw-r--r--net/rxrpc/internal.h106
-rw-r--r--net/rxrpc/krxiod.c262
-rw-r--r--net/rxrpc/krxsecd.c270
-rw-r--r--net/rxrpc/krxtimod.c204
-rw-r--r--net/rxrpc/main.c180
-rw-r--r--net/rxrpc/peer.c398
-rw-r--r--net/rxrpc/proc.c617
-rw-r--r--net/rxrpc/rxkad.c1154
-rw-r--r--net/rxrpc/rxrpc_syms.c34
-rw-r--r--net/rxrpc/sysctl.c121
-rw-r--r--net/rxrpc/transport.c846
-rw-r--r--net/sched/Kconfig56
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c81
-rw-r--r--net/sched/act_gact.c5
-rw-r--r--net/sched/act_ipt.c5
-rw-r--r--net/sched/act_mirred.c7
-rw-r--r--net/sched/act_pedit.c7
-rw-r--r--net/sched/act_police.c34
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/cls_api.c36
-rw-r--r--net/sched/cls_basic.c24
-rw-r--r--net/sched/cls_fw.c7
-rw-r--r--net/sched/cls_route.c13
-rw-r--r--net/sched/cls_rsvp.c1
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_rsvp6.c1
-rw-r--r--net/sched/cls_tcindex.c13
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c17
-rw-r--r--net/sched/sch_api.c234
-rw-r--r--net/sched/sch_atm.c28
-rw-r--r--net/sched/sch_cbq.c207
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_generic.c35
-rw-r--r--net/sched/sch_hfsc.c113
-rw-r--r--net/sched/sch_htb.c136
-rw-r--r--net/sched/sch_ingress.c27
-rw-r--r--net/sched/sch_netem.c108
-rw-r--r--net/sched/sch_prio.c14
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_tbf.c47
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/associola.c58
-rw-r--r--net/sctp/debug.c5
-rw-r--r--net/sctp/input.c51
-rw-r--r--net/sctp/inqueue.c8
-rw-r--r--net/sctp/ipv6.c85
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c12
-rw-r--r--net/sctp/protocol.c101
-rw-r--r--net/sctp/sm_make_chunk.c27
-rw-r--r--net/sctp/sm_sideeffect.c51
-rw-r--r--net/sctp/sm_statefuns.c77
-rw-r--r--net/sctp/sm_statetable.c2
-rw-r--r--net/sctp/socket.c457
-rw-r--r--net/sctp/transport.c34
-rw-r--r--net/sctp/ulpevent.c49
-rw-r--r--net/sctp/ulpqueue.c168
-rw-r--r--net/socket.c63
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c13
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c73
-rw-r--r--net/sunrpc/pmap_clnt.c383
-rw-r--r--net/sunrpc/rpc_pipe.c3
-rw-r--r--net/sunrpc/rpcb_clnt.c625
-rw-r--r--net/sunrpc/sched.c65
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcauth_unix.c21
-rw-r--r--net/sunrpc/svcsock.c30
-rw-r--r--net/sunrpc/xprt.c14
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/Kconfig2
-rw-r--r--net/tipc/config.c2
-rw-r--r--net/tipc/eth_media.c20
-rw-r--r--net/tipc/link.c48
-rw-r--r--net/tipc/msg.h18
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/port.c8
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/wanrouter/af_wanpipe.c2600
-rw-r--r--net/wanrouter/wanmain.c6
-rw-r--r--net/wireless/Kconfig16
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/core.c224
-rw-r--r--net/wireless/core.h49
-rw-r--r--net/wireless/sysfs.c80
-rw-r--r--net/wireless/sysfs.h9
-rw-r--r--net/wireless/wext.c (renamed from net/core/wireless.c)1142
-rw-r--r--net/x25/af_x25.c23
-rw-r--r--net/x25/x25_dev.c4
-rw-r--r--net/x25/x25_forward.c88
-rw-r--r--net/x25/x25_in.c14
-rw-r--r--net/x25/x25_out.c6
-rw-r--r--net/xfrm/xfrm_algo.c169
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_policy.c87
-rw-r--r--net/xfrm/xfrm_state.c69
-rw-r--r--net/xfrm/xfrm_user.c242
551 files changed, 41693 insertions, 36213 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ace6386384bc..91dde41b5481 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -100,7 +100,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
struct fddihdr *fddi = (struct fddihdr *)skb->data;
#ifdef CONFIG_INET
- if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP))
+ if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
/* Try to get ARP to resolve the header and fill destination address */
return arp_find(fddi->daddr, skb);
else
@@ -130,12 +130,13 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
* to start of packet data. Assume 802.2 SNAP frames for now.
*/
- skb->mac.raw = skb->data; /* point to frame control (FC) */
+ skb->dev = dev;
+ skb_reset_mac_header(skb); /* point to frame control (FC) */
if(fddi->hdr.llc_8022_1.dsap==0xe0)
{
skb_pull(skb, FDDI_K_8022_HLEN-3);
- type = __constant_htons(ETH_P_802_2);
+ type = htons(ETH_P_802_2);
}
else
{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 578f2a3d692d..87ffc12b6891 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -60,7 +60,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
* Due to the stupidity of the little endian byte-order we
* have to set the fp field this way.
*/
- hip->fp.fixed = __constant_htonl(0x04800018);
+ hip->fp.fixed = htonl(0x04800018);
hip->fp.d2_size = htonl(len + 8);
hip->le.fc = 0;
hip->le.double_wide = 0; /* only HIPPI 800 for the time being */
@@ -104,7 +104,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
* Only IP is currently supported
*/
- if(hip->snap.ethertype != __constant_htons(ETH_P_IP))
+ if(hip->snap.ethertype != htons(ETH_P_IP))
{
printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
return 0;
@@ -126,14 +126,14 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct hippi_hdr *hip;
- hip = (struct hippi_hdr *) skb->data;
-
/*
* This is actually wrong ... question is if we really should
* set the raw address here.
*/
- skb->mac.raw = skb->data;
- skb_pull(skb, HIPPI_HLEN);
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
+ hip = (struct hippi_hdr *)skb_mac_header(skb);
+ skb_pull(skb, HIPPI_HLEN);
/*
* No fancy promisc stuff here now.
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83f..04ee43e7538f 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,10 +56,10 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
};
rcu_read_lock();
- proto = find_snap_client(skb->h.raw);
+ proto = find_snap_client(skb_transport_header(skb));
if (proto) {
/* Pass the frame on. */
- skb->h.raw += 5;
+ skb->transport_header += 5;
skb_pull_rcsum(skb, 5);
rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
} else {
diff --git a/net/802/tr.c b/net/802/tr.c
index 96bd14452c55..0ba1946211c9 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -189,11 +189,13 @@ static int tr_rebuild_header(struct sk_buff *skb)
__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
{
- struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+ struct trh_hdr *trh;
struct trllc *trllc;
unsigned riflen=0;
- skb->mac.raw = skb->data;
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
+ trh = tr_hdr(skb);
if(trh->saddr[0] & TR_RII)
riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
@@ -552,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
if(j==1) {
segment=ntohs(entry->rseg[j-1])>>4;
seq_printf(seq," %03X",segment);
- };
+ }
+
segment=ntohs(entry->rseg[j])>>4;
brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7dfe..bd93c45778d4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -117,8 +117,7 @@ static void __exit vlan_cleanup_devices(void)
struct net_device *dev, *nxt;
rtnl_lock();
- for (dev = dev_base; dev; dev = nxt) {
- nxt = dev->next;
+ for_each_netdev_safe(dev, nxt) {
if (dev->priv_flags & IFF_802_1Q_VLAN) {
unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
VLAN_DEV_INFO(dev)->vlan_id);
@@ -470,7 +469,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
*/
default:
snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
- };
+ }
new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
vlan_setup);
@@ -685,7 +684,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
}
break;
- };
+ }
out:
return NOTIFY_DONE;
@@ -819,7 +818,7 @@ static int vlan_ioctl_handler(void __user *arg)
printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
__FUNCTION__, args.cmd);
return -EINVAL;
- };
+ }
out:
return err;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2fc8fe2cb366..ec46084f44b4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
break;
- };
+ }
return 0;
}
@@ -83,7 +83,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
/* Lifted from Gleb's VLAN code... */
memmove(skb->data - ETH_HLEN,
skb->data - VLAN_ETH_HLEN, 12);
- skb->mac.raw += VLAN_HLEN;
+ skb->mac_header += VLAN_HLEN;
}
}
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
break;
default:
break;
- };
+ }
/* Was a VLAN packet, grab the encapsulated protocol, which the layer
* three protocols care about.
@@ -258,7 +258,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
* won't work for fault tolerant netware but does for the rest.
*/
if (*(unsigned short *)rawp == 0xFFFF) {
- skb->protocol = __constant_htons(ETH_P_802_3);
+ skb->protocol = htons(ETH_P_802_3);
/* place it back on the queue to be handled by true layer 3 protocols.
*/
@@ -281,7 +281,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
/*
* Real 802.2 LLC
*/
- skb->protocol = __constant_htons(ETH_P_802_2);
+ skb->protocol = htons(ETH_P_802_2);
/* place it back on the queue to be handled by upper layer protocols.
*/
@@ -380,6 +380,9 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
} else {
vhdr->h_vlan_encapsulated_proto = htons(len);
}
+
+ skb->protocol = htons(ETH_P_8021Q);
+ skb_reset_network_header(skb);
}
/* Before delegating work to the lower layer, enter our MAC-address */
@@ -445,7 +448,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
- if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) {
+ if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
int orig_headroom = skb_headroom(skb);
unsigned short veth_TCI;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5e24f72602a1..d216a64421cd 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -237,13 +237,9 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
* The following few functions build the content of /proc/net/vlan/config
*/
-/* starting at dev, find a VLAN device */
-static struct net_device *vlan_skip(struct net_device *dev)
+static inline int is_vlan_dev(struct net_device *dev)
{
- while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN))
- dev = dev->next;
-
- return dev;
+ return dev->priv_flags & IFF_802_1Q_VLAN;
}
/* start read of /proc/net/vlan/config */
@@ -257,19 +253,35 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
if (*pos == 0)
return SEQ_START_TOKEN;
- for (dev = vlan_skip(dev_base); dev && i < *pos;
- dev = vlan_skip(dev->next), ++i);
+ for_each_netdev(dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+ if (i++ == *pos)
+ return dev;
+ }
- return (i == *pos) ? dev : NULL;
+ return NULL;
}
static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
+
++*pos;
- return vlan_skip((v == SEQ_START_TOKEN)
- ? dev_base
- : ((struct net_device *)v)->next);
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+ dev = net_device_entry(&dev_base_head);
+
+ for_each_netdev_continue(dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+ return dev;
+ }
+
+ return NULL;
}
static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/Kconfig b/net/Kconfig
index 915657832d94..caeacd16656a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -27,13 +27,6 @@ if NET
menu "Networking options"
-config NETDEBUG
- bool "Network packet debugging"
- help
- You can say Y here if you want to get additional messages useful in
- debugging bad packets, but can overwhelm logs under denial of service
- attacks.
-
source "net/packet/Kconfig"
source "net/unix/Kconfig"
source "net/xfrm/Kconfig"
@@ -219,14 +212,21 @@ endmenu
source "net/ax25/Kconfig"
source "net/irda/Kconfig"
source "net/bluetooth/Kconfig"
-source "net/ieee80211/Kconfig"
-
-config WIRELESS_EXT
- bool
+source "net/rxrpc/Kconfig"
config FIB_RULES
bool
+menu "Wireless"
+
+source "net/wireless/Kconfig"
+source "net/mac80211/Kconfig"
+source "net/ieee80211/Kconfig"
+
+endmenu
+
+source "net/rfkill/Kconfig"
+
endif # if NET
endmenu # Networking
diff --git a/net/Makefile b/net/Makefile
index 4854ac506313..34e5b2d7f877 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,16 +38,20 @@ obj-$(CONFIG_IRDA) += irda/
obj-$(CONFIG_BT) += bluetooth/
obj-$(CONFIG_SUNRPC) += sunrpc/
obj-$(CONFIG_RXRPC) += rxrpc/
+obj-$(CONFIG_AF_RXRPC) += rxrpc/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/
obj-$(CONFIG_VLAN_8021Q) += 8021q/
obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
+obj-y += wireless/
+obj-$(CONFIG_MAC80211) += mac80211/
obj-$(CONFIG_IEEE80211) += ieee80211/
obj-$(CONFIG_TIPC) += tipc/
obj-$(CONFIG_NETLABEL) += netlabel/
obj-$(CONFIG_IUCV) += iucv/
+obj-$(CONFIG_RFKILL) += rfkill/
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d89d62f3702f..5ef6a238bdbc 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -118,7 +118,9 @@ static void __aarp_send_query(struct aarp_entry *a)
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -163,7 +165,9 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -212,7 +216,9 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -539,7 +545,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
int hash;
struct aarp_entry *a;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Check for LocalTalk first */
if (dev->type == ARPHRD_LOCALTLK) {
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 113c175f1715..fbdfb1224ae1 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1275,7 +1275,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb_pull(skb, 13);
skb->dev = dev;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
stats = dev->priv;
stats->rx_packets++;
@@ -1383,10 +1383,10 @@ free_it:
* @pt - packet type
*
* Receive a packet (in skb) from device dev. This has come from the SNAP
- * decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP
- * header, skb->len is the DDP length. The physical headers have been
- * extracted. PPP should probably pass frames marked as for this layer.
- * [ie ARPHRD_ETHERTALK]
+ * decoder, and on entry skb->transport_header is the DDP header, skb->len
+ * is the DDP header, skb->len is the DDP length. The physical headers
+ * have been extracted. PPP should probably pass frames marked as for this
+ * layer. [ie ARPHRD_ETHERTALK]
*/
static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
@@ -1417,10 +1417,13 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
/*
* Size check to see if ddp->deh_len was crap
* (Otherwise we'll detonate most spectacularly
- * in the middle of recvmsg()).
+ * in the middle of atalk_checksum() or recvmsg()).
*/
- if (skb->len < sizeof(*ddp))
+ if (skb->len < sizeof(*ddp) || skb->len < (len_hops & 1023)) {
+ pr_debug("AppleTalk: dropping corrupted frame (deh_len=%u, "
+ "skb->len=%u)\n", len_hops & 1023, skb->len);
goto freeit;
+ }
/*
* Any checksums. Note we don't do htons() on this == is assumed to be
@@ -1481,7 +1484,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
/* Expand any short form frames */
- if (skb->mac.raw[2] == 1) {
+ if (skb_mac_header(skb)[2] == 1) {
struct ddpehdr *ddp;
/* Find our address */
struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1507,8 +1510,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
* we write the network numbers !
*/
- ddp->deh_dnode = skb->mac.raw[0]; /* From physical header */
- ddp->deh_snode = skb->mac.raw[1]; /* From physical header */
+ ddp->deh_dnode = skb_mac_header(skb)[0]; /* From physical header */
+ ddp->deh_snode = skb_mac_header(skb)[1]; /* From physical header */
ddp->deh_dnet = ap->s_net; /* Network number */
ddp->deh_snet = ap->s_net;
@@ -1519,7 +1522,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
/* Non routable, so force a drop if we slip up later */
ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return atalk_rcv(skb, dev, pt, orig_dev);
freeit:
@@ -1768,6 +1771,9 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
rc = sock_get_timestamp(sk, argp);
break;
+ case SIOCGSTAMPNS:
+ rc = sock_get_timestampns(sk, argp);
+ break;
/* Routing */
case SIOCADDRT:
case SIOCDELRT:
@@ -1838,7 +1844,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
.sendpage = sock_no_sendpage,
};
-#include <linux/smp_lock.h>
SOCKOPS_WRAP(atalk_dgram, PF_APPLETALK);
static struct notifier_block ddp_notifier = {
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ec4ebd3299e3..0e9f00c5c899 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -173,7 +173,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
}
skb_push(skb, minheadroom);
if (brvcc->encaps == e_llc)
- memcpy(skb->data, llc_oui_pid_pad, 10);
+ skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
else
memset(skb->data, 0, 2);
#endif /* FASTER_VERSION */
@@ -375,11 +375,11 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
{
if (brvcc->filter.netmask == 0)
return 0; /* no filter in place */
- if (type == __constant_htons(ETH_P_IP) &&
+ if (type == htons(ETH_P_IP) &&
(((struct iphdr *) (skb->data))->daddr & brvcc->filter.
netmask) == brvcc->filter.prefix)
return 0;
- if (type == __constant_htons(ETH_P_ARP))
+ if (type == htons(ETH_P_ARP))
return 0;
/* TODO: we should probably filter ARPs too.. don't want to have
* them returning values that don't make sense, or is that ok?
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
than should be. What else should I set? */
skb_pull(skb, plen);
- skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN;
+ skb_set_mac_header(skb, -ETH_HLEN);
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_BR2684_FAST_TRANS
skb->protocol = ((u16 *) skb->data)[-1];
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ebb5d0ce8b6f..876b77f14745 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -213,7 +213,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
ATM_SKB(skb)->vcc = vcc;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
if (!clip_vcc->encap
|| skb->len < RFC1483LLC_LEN
|| memcmp(skb->data, llc_oui, sizeof (llc_oui)))
@@ -261,14 +261,6 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
spin_unlock_irqrestore(&PRIV(dev)->xoff_lock, flags);
}
-static void clip_neigh_destroy(struct neighbour *neigh)
-{
- DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh);
- if (NEIGH2ENTRY(neigh)->vccs)
- printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n");
- NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD;
-}
-
static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb);
@@ -342,7 +334,6 @@ static struct neigh_table clip_tbl = {
/* parameters are copied from ARP ... */
.parms = {
.tbl = &clip_tbl,
- .neigh_destructor = clip_neigh_destroy,
.base_reachable_time = 30 * HZ,
.retrans_time = 1 * HZ,
.gc_staletime = 60 * HZ,
@@ -711,7 +702,7 @@ static struct atm_dev atmarpd_dev = {
.ops = &atmarpd_dev_ops,
.type = "arpd",
.number = 999,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
};
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 8ccee4591f65..7afd8e7754fd 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -82,6 +82,9 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP: /* borrowed from IP */
error = sock_get_timestamp(sk, argp);
goto done;
+ case SIOCGSTAMPNS: /* borrowed from IP */
+ error = sock_get_timestampns(sk, argp);
+ goto done;
case ATM_SETSC:
printk(KERN_WARNING "ATM_SETSC is obsolete\n");
error = 0;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3d804d61f656..4dc5f2b8c43c 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -283,8 +283,8 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
- (long)skb->head, (long)skb->data, (long)skb->tail,
- (long)skb->end);
+ (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
+ (long)skb_end_pointer(skb));
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
lec_handle_bridge(skb, dev);
@@ -576,8 +576,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
break;
}
skb2->len = sizeof(struct atmlec_msg);
- memcpy(skb2->data, mesg,
- sizeof(struct atmlec_msg));
+ skb_copy_to_linear_data(skb2, mesg,
+ sizeof(*mesg));
atm_force_charge(priv->lecd, skb2->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb2);
@@ -630,7 +630,7 @@ static struct atm_dev lecatm_dev = {
.ops = &lecdev_ops,
.type = "lec",
.number = 999, /* dummy device number */
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock)
};
/*
@@ -825,7 +825,6 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
if (!hlist_empty(&priv->lec_arp_empty_ones)) {
lec_arp_check_empties(priv, vcc, skb);
}
- skb->dev = dev;
skb_pull(skb, 2); /* skip lec_id */
#ifdef CONFIG_TR
if (priv->is_trdev)
@@ -1338,7 +1337,7 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
if (skb == NULL)
return -1;
skb->len = *sizeoftlvs;
- memcpy(skb->data, *tlvs, *sizeoftlvs);
+ skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs);
retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
}
return retval;
@@ -1372,7 +1371,7 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
if (skb == NULL)
return 0;
skb->len = sizeoftlvs;
- memcpy(skb->data, tlvs, sizeoftlvs);
+ skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
if (retval != 0)
printk("lec.c: lane2_associate_req() failed\n");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index cb3c004ff022..7c85aa551d5e 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -504,11 +504,13 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
skb_push(skb, sizeof(tagged_llc_snap_hdr)); /* add LLC/SNAP header */
- memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr));
+ skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
+ sizeof(tagged_llc_snap_hdr));
} else {
skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
skb_push(skb, sizeof(struct llc_snap_hdr)); /* add LLC/SNAP header + tag */
- memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr));
+ skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
+ sizeof(struct llc_snap_hdr));
}
atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
@@ -711,11 +713,12 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
skb_push(new_skb, eg->ctrl_info.DH_length); /* add MAC header */
- memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
+ skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header,
+ eg->ctrl_info.DH_length);
new_skb->protocol = eth_type_trans(new_skb, dev);
- new_skb->nh.raw = new_skb->data;
+ skb_reset_network_header(new_skb);
- eg->latest_ip_addr = new_skb->nh.iph->saddr;
+ eg->latest_ip_addr = ip_hdr(new_skb)->saddr;
eg->packets_rcvd++;
mpc->eg_ops->put(eg);
@@ -734,7 +737,7 @@ static struct atm_dev mpc_dev = {
.ops = &mpc_ops,
.type = "mpc",
.number = 42,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(mpc_dev.lock)
/* members not explicitly initialised will be 0 */
};
@@ -936,7 +939,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
if (skb == NULL)
return -ENOMEM;
skb_put(skb, sizeof(struct k_message));
- memcpy(skb->data, mesg, sizeof(struct k_message));
+ skb_copy_to_linear_data(skb, mesg, sizeof(*mesg));
atm_force_charge(mpc->mpoad_vcc, skb->truesize);
sk = sk_atm(mpc->mpoad_vcc);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 31d98b57e1de..d14baaf1f4c3 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -256,7 +256,7 @@ static struct atm_dev sigd_dev = {
.ops = &sigd_dev_ops,
.type = "sig",
.number = 999,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(sigd_dev.lock)
};
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index a8993a041724..43dd86fca4d3 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -1,30 +1,27 @@
#
# Amateur Radio protocols and AX.25 device configuration
#
-# 19971130 Now in an own category to make correct compilation of the
-# AX.25 stuff easier...
-# Joerg Reuter DL1BKE <jreuter@yaina.de>
-# 19980129 Moved to net/ax25/Config.in, sourcing device drivers.
menuconfig HAMRADIO
depends on NET
bool "Amateur Radio support"
help
If you want to connect your Linux box to an amateur radio, answer Y
- here. You want to read <http://www.tapr.org/tapr/html/pkthome.html> and
- the AX25-HOWTO, available from <http://www.tldp.org/docs.html#howto>.
+ here. You want to read <http://www.tapr.org/tapr/html/pkthome.html>
+ and more specifically about AX.25 on Linux
+ <http://www.linux-ax25.org/>.
Note that the answer to this question won't directly affect the
kernel: saying N will just cause the configurator to skip all
the questions about amateur radio.
comment "Packet Radio protocols"
- depends on HAMRADIO && NET
+ depends on HAMRADIO
config AX25
tristate "Amateur Radio AX.25 Level 2 protocol"
- depends on HAMRADIO && NET
- ---help---
+ depends on HAMRADIO
+ help
This is the protocol used for computer communication over amateur
radio. It is either used by itself for point-to-point links, or to
carry other protocols such as tcp/ip. To use it, you need a device
@@ -52,6 +49,7 @@ config AX25
config AX25_DAMA_SLAVE
bool "AX.25 DAMA Slave support"
+ default y
depends on AX25
help
DAMA is a mechanism to prevent collisions when doing AX.25
@@ -59,23 +57,38 @@ config AX25_DAMA_SLAVE
from clients (called "slaves") and redistributes it to other slaves.
If you say Y here, your Linux box will act as a DAMA slave; this is
transparent in that you don't have to do any special DAMA
- configuration. (Linux cannot yet act as a DAMA server.) If unsure,
- say N.
+ configuration. Linux cannot yet act as a DAMA server. This option
+ only compiles DAMA slave support into the kernel. It still needs to
+ be enabled at runtime. For more about DAMA see
+ <http://www.linux-ax25.org>. If unsure, say Y.
+
+# placeholder until implemented
+config AX25_DAMA_MASTER
+ bool 'AX.25 DAMA Master support'
+ depends on AX25_DAMA_SLAVE && BROKEN
+ help
+ DAMA is a mechanism to prevent collisions when doing AX.25
+ networking. A DAMA server (called "master") accepts incoming traffic
+ from clients (called "slaves") and redistributes it to other slaves.
+ If you say Y here, your Linux box will act as a DAMA master; this is
+ transparent in that you don't have to do any special DAMA
+ configuration. Linux cannot yet act as a DAMA server. This option
+ only compiles DAMA slave support into the kernel. It still needs to
+ be explicitly enabled, so if unsure, say Y.
-# bool ' AX.25 DAMA Master support' CONFIG_AX25_DAMA_MASTER
config NETROM
tristate "Amateur Radio NET/ROM protocol"
depends on AX25
- ---help---
+ help
NET/ROM is a network layer protocol on top of AX.25 useful for
routing.
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
- contained in the AX25-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>. You also might want to
- check out the file <file:Documentation/networking/ax25.txt>. More
- information about digital amateur radio in general is on the WWW at
+ contained in the Linux Ham Wiki, available from
+ <http://www.linux-ax25.org>. You also might want to check out the
+ file <file:Documentation/networking/ax25.txt>. More information about
+ digital amateur radio in general is on the WWW at
<http://www.tapr.org/tapr/html/pkthome.html>.
To compile this driver as a module, choose M here: the
@@ -84,27 +97,25 @@ config NETROM
config ROSE
tristate "Amateur Radio X.25 PLP (Rose)"
depends on AX25
- ---help---
+ help
The Packet Layer Protocol (PLP) is a way to route packets over X.25
connections in general and amateur radio AX.25 connections in
particular, essentially an alternative to NET/ROM.
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
- contained in the AX25-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>. You also might want to
- check out the file <file:Documentation/networking/ax25.txt>. More
- information about digital amateur radio in general is on the WWW at
+ contained in the Linux Ham Wiki, available from
+ <http://www.linux-ax25.org>. You also might want to check out the
+ file <file:Documentation/networking/ax25.txt>. More information about
+ digital amateur radio in general is on the WWW at
<http://www.tapr.org/tapr/html/pkthome.html>.
To compile this driver as a module, choose M here: the
module will be called rose.
-
menu "AX.25 network device drivers"
- depends on HAMRADIO && NET && AX25!=n
+ depends on HAMRADIO && AX25
source "drivers/net/hamradio/Kconfig"
endmenu
-
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1c07c6a50eb8..429e13a6c6ad 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -23,7 +23,6 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
-#include <linux/smp_lock.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <net/ax25.h>
@@ -1127,22 +1126,22 @@ static int __must_check ax25_connect(struct socket *sock,
switch (sk->sk_state) {
case TCP_SYN_SENT: /* still trying */
err = -EINPROGRESS;
- goto out;
+ goto out_release;
case TCP_ESTABLISHED: /* connection established */
sock->state = SS_CONNECTED;
- goto out;
+ goto out_release;
case TCP_CLOSE: /* connection refused */
sock->state = SS_UNCONNECTED;
err = -ECONNREFUSED;
- goto out;
+ goto out_release;
}
}
if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
err = -EISCONN; /* No reconnect on a seqpacket socket */
- goto out;
+ goto out_release;
}
sk->sk_state = TCP_CLOSE;
@@ -1159,12 +1158,12 @@ static int __must_check ax25_connect(struct socket *sock,
/* Valid number of digipeaters ? */
if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
err = -ENOBUFS;
- goto out;
+ goto out_release;
}
digi->ndigi = fsa->fsa_ax25.sax25_ndigis;
@@ -1194,7 +1193,7 @@ static int __must_check ax25_connect(struct socket *sock,
current->comm);
if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
kfree(digi);
- goto out;
+ goto out_release;
}
ax25_fillin_cb(ax25, ax25->ax25_dev);
@@ -1203,7 +1202,7 @@ static int __must_check ax25_connect(struct socket *sock,
if (ax25->ax25_dev == NULL) {
kfree(digi);
err = -EHOSTUNREACH;
- goto out;
+ goto out_release;
}
}
@@ -1213,7 +1212,7 @@ static int __must_check ax25_connect(struct socket *sock,
kfree(digi);
err = -EADDRINUSE; /* Already such a connection */
ax25_cb_put(ax25t);
- goto out;
+ goto out_release;
}
ax25->dest_addr = fsa->fsa_ax25.sax25_call;
@@ -1223,7 +1222,7 @@ static int __must_check ax25_connect(struct socket *sock,
if (sk->sk_type != SOCK_SEQPACKET) {
sock->state = SS_CONNECTED;
sk->sk_state = TCP_ESTABLISHED;
- goto out;
+ goto out_release;
}
/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
@@ -1255,55 +1254,53 @@ static int __must_check ax25_connect(struct socket *sock,
/* Now the loop */
if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
err = -EINPROGRESS;
- goto out;
+ goto out_release;
}
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- set_current_state(TASK_INTERRUPTIBLE);
- release_sock(sk);
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
/* Not in ABM, not in WAIT_UA -> failed */
sock->state = SS_UNCONNECTED;
err = sock_error(sk); /* Always set at this point */
- goto out;
+ goto out_release;
}
sock->state = SS_CONNECTED;
- err=0;
-out:
+ err = 0;
+out_release:
release_sock(sk);
return err;
}
-
static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -1328,30 +1325,29 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
* The read queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- release_sock(sk);
- current->state = TASK_INTERRUPTIBLE;
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -1425,7 +1421,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
struct sockaddr_ax25 sax;
struct sk_buff *skb;
ax25_digi dtmp, *dp;
- unsigned char *asmptr;
ax25_cb *ax25;
size_t size;
int lv, err, addr_len = msg->msg_namelen;
@@ -1548,13 +1543,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Add the PID if one is not supplied by the user in the skb */
- if (!ax25->pidincl) {
- asmptr = skb_push(skb, 1);
- *asmptr = sk->sk_protocol;
- }
+ if (!ax25->pidincl)
+ *skb_push(skb, 1) = sk->sk_protocol;
SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
@@ -1573,7 +1566,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+ skb_push(skb, 1 + ax25_addr_size(dp));
SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
@@ -1581,17 +1574,17 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
/* Build an AX.25 header */
- asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
- &sax.sax25_call, dp,
- AX25_COMMAND, AX25_MODULUS));
+ lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
+ dp, AX25_COMMAND, AX25_MODULUS);
SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
- skb->h.raw = asmptr;
+ skb_set_transport_header(skb, lv);
- SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+ SOCK_DEBUG(sk, "base=%p pos=%p\n",
+ skb->data, skb_transport_header(skb));
- *asmptr = AX25_UI;
+ *skb_transport_header(skb) = AX25_UI;
/* Datagram frames go straight out of the door as UI */
ax25_queue_xmit(skb, ax25->ax25_dev->dev);
@@ -1631,8 +1624,8 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!ax25_sk(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
- skb->h.raw = skb->data;
- copied = skb->len;
+ skb_reset_transport_header(skb);
+ copied = skb->len;
if (copied > size) {
copied = size;
@@ -1645,9 +1638,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
ax25_digi digi;
ax25_address src;
+ const unsigned char *mac = skb_mac_header(skb);
- ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
-
+ ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
+ &digi, NULL, NULL);
sax->sax25_family = AF_AX25;
/* We set this correctly, even though we may not let the
application know the digi calls further down (because it
@@ -1711,6 +1705,10 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
res = sock_get_timestamp(sk, argp);
break;
+ case SIOCGSTAMPNS:
+ res = sock_get_timestampns(sk, argp);
+ break;
+
case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */
case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */
case SIOCAX25GETUID: {
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 9569dd3fa466..a49773ff2b92 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -136,7 +136,7 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
return;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
p = skb_put(skb, 2);
*p++ = cmd;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4a6b26becadc..0ddaff0df217 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -61,12 +61,14 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
skb_reserve(skbn, AX25_MAX_HEADER_LEN);
skbn->dev = ax25->ax25_dev->dev;
- skbn->h.raw = skbn->data;
- skbn->nh.raw = skbn->data;
+ skb_reset_network_header(skbn);
+ skb_reset_transport_header(skbn);
/* Copy data from the fragments */
while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
@@ -122,8 +124,8 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
}
skb_pull(skb, 1); /* Remove PID */
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
skb->dev = ax25->ax25_dev->dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_IP);
@@ -196,7 +198,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
* Process the AX.25/LAPB frame.
*/
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
kfree_skb(skb);
@@ -233,7 +235,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
/* UI frame - bypass LAPB processing */
if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
- skb->h.raw = skb->data + 2; /* skip control and pid */
+ skb_set_transport_header(skb, 2); /* skip control and pid */
ax25_send_to_raw(&dest, skb, skb->data[1]);
@@ -246,8 +248,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
switch (skb->data[1]) {
case AX25_P_IP:
skb_pull(skb,2); /* drop PID/CTRL */
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_IP);
@@ -256,8 +258,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
case AX25_P_ARP:
skb_pull(skb,2);
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7f818bbcd1c5..930e4918037f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
digipeat = route->digipeat;
dev = route->dev;
ip_mode = route->ip_mode;
- };
+ }
if (dev == NULL)
dev = skb->dev;
@@ -171,7 +171,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
src_c = *(ax25_address *)(bp + 8);
skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */
- ourskb->nh.raw = ourskb->data;
+ skb_reset_network_header(ourskb);
ax25=ax25_send_frame(
ourskb,
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 223835092b7a..92b517af7260 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,8 +148,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
if (ka9qfrag == 1) {
skb_reserve(skbn, frontlen + 2);
- skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_set_network_header(skbn,
+ skb_network_offset(skb));
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
p = skb_push(skbn, 2);
*p++ = AX25_P_SEGMENT;
@@ -161,8 +162,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
}
} else {
skb_reserve(skbn, frontlen + 1);
- skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_set_network_header(skbn,
+ skb_network_offset(skb));
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
p = skb_push(skbn, 1);
*p = AX25_P_TEXT;
}
@@ -205,7 +207,7 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
if (skb == NULL)
return;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (ax25->modulus == AX25_MODULUS) {
frame = skb_push(skb, 1);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index b6c577e3c914..5fe9b2a6697d 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -162,7 +162,7 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Assume a response - address structure for DTE */
if (ax25->modulus == AX25_MODULUS) {
@@ -205,7 +205,7 @@ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *des
return; /* Next SABM will get DM'd */
skb_reserve(skb, dev->hard_header_len);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
ax25_digi_invert(digi, &retdigi);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c7228cfc6218..d942b946ba07 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b85d1492c357..1c8f4a0c5f43 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -37,7 +37,6 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/errno.h>
-#include <linux/smp_lock.h>
#include <linux/net.h>
#include <net/sock.h>
@@ -326,7 +325,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
return 0;
}
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
/* Verify and pull out header */
if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
@@ -364,26 +363,28 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_COMPRESSED_SRC_ONLY:
memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
- memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+ memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
case BNEP_COMPRESSED_DST_ONLY:
- memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
- memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+ memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
+ ETH_ALEN);
+ memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
+ ETH_ALEN + 2);
break;
case BNEP_GENERAL:
- memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+ memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
+ ETH_ALEN * 2);
put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
}
- memcpy(__skb_put(nskb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len);
kfree_skb(skb);
s->stats.rx_packets++;
- nskb->dev = dev;
nskb->ip_summed = CHECKSUM_NONE;
nskb->protocol = eth_type_trans(nskb, dev);
netif_rx_ni(nskb);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 3933608a9296..66bef1ccee2a 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -124,7 +124,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
}
if (skb && (skb->len > 0))
- memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
memcpy(skb_put(nskb, count), buf, count);
@@ -256,7 +256,7 @@ static void cmtp_process_transmit(struct cmtp_session *session)
hdr[2] = size >> 8;
}
- memcpy(skb_put(nskb, size), skb->data, size);
+ skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
skb_pull(skb, size);
if (skb->len > 0) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f3403fdb59f8..63980bd6b5f2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -72,11 +72,11 @@ void hci_acl_connect(struct hci_conn *conn)
inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
cp.pscan_rep_mode = ie->data.pscan_rep_mode;
cp.pscan_mode = ie->data.pscan_mode;
- cp.clock_offset = ie->data.clock_offset | __cpu_to_le16(0x8000);
+ cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000);
memcpy(conn->dev_class, ie->data.dev_class, 3);
}
- cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+ cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
cp.role_switch = 0x01;
else
@@ -107,7 +107,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
conn->state = BT_DISCONN;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.reason = reason;
hci_send_cmd(conn->hdev, OGF_LINK_CTL,
OCF_DISCONNECT, sizeof(cp), &cp);
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
conn->state = BT_CONNECT;
conn->out = 1;
- cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
- cp.handle = __cpu_to_le16(handle);
+ cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+ cp.handle = cpu_to_le16(handle);
hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
}
@@ -348,7 +348,7 @@ int hci_conn_auth(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
struct hci_cp_auth_requested cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
}
return 0;
@@ -368,7 +368,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
if (hci_conn_auth(conn)) {
struct hci_cp_set_conn_encrypt cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.encrypt = 1;
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
}
@@ -383,7 +383,7 @@ int hci_conn_change_link_key(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
struct hci_cp_change_conn_link_key cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
}
return 0;
@@ -423,7 +423,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
struct hci_cp_exit_sniff_mode cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
}
@@ -452,21 +452,21 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn)
if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
struct hci_cp_sniff_subrate cp;
- cp.handle = __cpu_to_le16(conn->handle);
- cp.max_latency = __constant_cpu_to_le16(0);
- cp.min_remote_timeout = __constant_cpu_to_le16(0);
- cp.min_local_timeout = __constant_cpu_to_le16(0);
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.max_latency = cpu_to_le16(0);
+ cp.min_remote_timeout = cpu_to_le16(0);
+ cp.min_local_timeout = cpu_to_le16(0);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
}
if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
struct hci_cp_sniff_mode cp;
- cp.handle = __cpu_to_le16(conn->handle);
- cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
- cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
- cp.attempt = __constant_cpu_to_le16(4);
- cp.timeout = __constant_cpu_to_le16(1);
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
+ cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
+ cp.attempt = cpu_to_le16(4);
+ cp.timeout = cpu_to_le16(1);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_SNIFF_MODE, sizeof(cp), &cp);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4917919d86a6..aa4b56a8c3ea 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
default:
err = -ETIMEDOUT;
break;
- };
+ }
hdev->req_status = hdev->req_result = 0;
@@ -216,10 +216,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
/* Host buffer size */
{
struct hci_cp_host_buffer_size cp;
- cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE);
+ cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE);
cp.sco_mtu = HCI_MAX_SCO_SIZE;
- cp.acl_max_pkt = __cpu_to_le16(0xffff);
- cp.sco_max_pkt = __cpu_to_le16(0xffff);
+ cp.acl_max_pkt = cpu_to_le16(0xffff);
+ cp.sco_max_pkt = cpu_to_le16(0xffff);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
}
#endif
@@ -240,11 +240,11 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
}
/* Page timeout ~20 secs */
- param = __cpu_to_le16(0x8000);
+ param = cpu_to_le16(0x8000);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
/* Connection accept timeout ~20 secs */
- param = __cpu_to_le16(0x7d00);
+ param = cpu_to_le16(0x7d00);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
}
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
}
hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
- hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf));
+ hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf));
hdr->plen = plen;
if (plen)
@@ -1060,7 +1060,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
hdr = (void *) hdev->sent_cmd->data;
- if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf)))
+ if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf)))
return NULL;
BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
@@ -1074,11 +1074,11 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
struct hci_acl_hdr *hdr;
int len = skb->len;
- hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
- hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags));
- hdr->dlen = __cpu_to_le16(len);
-
- skb->h.raw = (void *) hdr;
+ skb_push(skb, HCI_ACL_HDR_SIZE);
+ skb_reset_transport_header(skb);
+ hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
+ hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
+ hdr->dlen = cpu_to_le16(len);
}
int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
@@ -1140,11 +1140,12 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
return -EINVAL;
}
- hdr.handle = __cpu_to_le16(conn->handle);
+ hdr.handle = cpu_to_le16(conn->handle);
hdr.dlen = skb->len;
- skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
- memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+ skb_push(skb, HCI_SCO_HDR_SIZE);
+ skb_reset_transport_header(skb);
+ memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
skb->dev = (void *) hdev;
bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
@@ -1387,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
case HCI_SCODATA_PKT:
kfree_skb(skb);
continue;
- };
+ }
}
/* Process frame */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 936d3fc479cd..447ba7131220 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -783,7 +783,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (conn->type == ACL_LINK && hdev->link_policy) {
struct hci_cp_write_link_policy cp;
cp.handle = ev->handle;
- cp.policy = __cpu_to_le16(hdev->link_policy);
+ cp.policy = cpu_to_le16(hdev->link_policy);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
}
@@ -793,8 +793,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
struct hci_cp_change_conn_ptype cp;
cp.handle = ev->handle;
cp.pkt_type = (conn->type == ACL_LINK) ?
- __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
- __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+ cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
+ cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
hci_send_cmd(hdev, OGF_LINK_CTL,
OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
@@ -970,7 +970,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
if (!ev->status) {
struct hci_cp_set_conn_encrypt cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.encrypt = 1;
hci_send_cmd(conn->hdev, OGF_LINK_CTL,
OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f5cfbbebb8..bfc9a35bad33 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -375,7 +375,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
hci_sock_cmsg(sk, msg, skb);
@@ -499,6 +499,15 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char
break;
case HCI_FILTER:
+ {
+ struct hci_filter *f = &hci_pi(sk)->filter;
+
+ uf.type_mask = f->type_mask;
+ uf.opcode = f->opcode;
+ uf.event_mask[0] = *((u32 *) f->event_mask + 0);
+ uf.event_mask[1] = *((u32 *) f->event_mask + 1);
+ }
+
len = min_t(unsigned int, len, sizeof(uf));
if (copy_from_user(&uf, optval, len)) {
err = -EFAULT;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 801d687ea4ef..25835403d659 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -305,7 +305,7 @@ int hci_register_sysfs(struct hci_dev *hdev)
BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
- dev->class = bt_class;
+ dev->bus = &bt_bus;
dev->parent = hdev->parent;
strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE);
@@ -322,6 +322,10 @@ int hci_register_sysfs(struct hci_dev *hdev)
if (device_create_file(dev, bt_attrs[i]) < 0)
BT_ERR("Failed to create device attribute");
+ if (sysfs_create_link(&bt_class->subsys.kobj,
+ &dev->kobj, kobject_name(&dev->kobj)) < 0)
+ BT_ERR("Failed to create class symlink");
+
return 0;
}
@@ -329,6 +333,9 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
{
BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+ sysfs_remove_link(&bt_class->subsys.kobj,
+ kobject_name(&hdev->dev.kobj));
+
device_del(&hdev->dev);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 4c914df5fd06..d342e89b8bdd 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -319,7 +319,7 @@ static int __hidp_send_ctrl_message(struct hidp_session *session,
return 0;
}
-static int inline hidp_send_ctrl_message(struct hidp_session *session,
+static inline int hidp_send_ctrl_message(struct hidp_session *session,
unsigned char hdr, unsigned char *data, int size)
{
int err;
@@ -679,6 +679,27 @@ static void hidp_close(struct hid_device *hid)
{
}
+static const struct {
+ __u16 idVendor;
+ __u16 idProduct;
+ unsigned quirks;
+} hidp_blacklist[] = {
+ /* Apple wireless Mighty Mouse */
+ { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
+
+ { } /* Terminating entry */
+};
+
+static void hidp_setup_quirks(struct hid_device *hid)
+{
+ unsigned int n;
+
+ for (n = 0; hidp_blacklist[n].idVendor; n++)
+ if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) &&
+ hidp_blacklist[n].idProduct == le16_to_cpu(hid->product))
+ hid->quirks = hidp_blacklist[n].quirks;
+}
+
static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req)
{
struct hid_device *hid = session->hid;
@@ -708,6 +729,8 @@ static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_conn
hid->hidinput_input_event = hidp_hidinput_event;
+ hidp_setup_quirks(hid);
+
list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list)
hidp_send_report(session, report);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e83ee82440d3..a59b1fb63b76 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -459,8 +459,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
sk->sk_state = BT_DISCONN;
l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
- req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_DISCONN_REQ, sizeof(req), &req);
} else {
@@ -652,7 +652,7 @@ static int l2cap_do_connect(struct sock *sk)
if (sk->sk_type == SOCK_SEQPACKET) {
struct l2cap_conn_req req;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_REQ, sizeof(req), &req);
@@ -868,8 +868,8 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
- lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+ lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
if (sk->sk_type == SOCK_DGRAM)
put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
@@ -954,11 +954,17 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
switch (optname) {
case L2CAP_OPTIONS:
+ opts.imtu = l2cap_pi(sk)->imtu;
+ opts.omtu = l2cap_pi(sk)->omtu;
+ opts.flush_to = l2cap_pi(sk)->flush_to;
+ opts.mode = 0x00;
+
len = min_t(unsigned int, sizeof(opts), optlen);
if (copy_from_user((char *) &opts, optval, len)) {
err = -EFAULT;
break;
}
+
l2cap_pi(sk)->imtu = opts.imtu;
l2cap_pi(sk)->omtu = opts.omtu;
break;
@@ -1096,7 +1102,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
} else if (sk->sk_state == BT_CONNECT) {
struct l2cap_conn_req req;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
}
@@ -1192,13 +1198,13 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
return NULL;
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
- lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
- lh->cid = __cpu_to_le16(0x0001);
+ lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
+ lh->cid = cpu_to_le16(0x0001);
cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
cmd->code = code;
cmd->ident = ident;
- cmd->len = __cpu_to_le16(dlen);
+ cmd->len = cpu_to_le16(dlen);
if (dlen) {
count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
@@ -1316,11 +1322,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
break;
case 2:
- *((u16 *) opt->val) = __cpu_to_le16(val);
+ *((u16 *) opt->val) = cpu_to_le16(val);
break;
case 4:
- *((u32 *) opt->val) = __cpu_to_le32(val);
+ *((u32 *) opt->val) = cpu_to_le32(val);
break;
default:
@@ -1346,8 +1352,8 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
// l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
- req->dcid = __cpu_to_le16(pi->dcid);
- req->flags = __cpu_to_le16(0);
+ req->dcid = cpu_to_le16(pi->dcid);
+ req->flags = cpu_to_le16(0);
return ptr - data;
}
@@ -1383,9 +1389,9 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
else
flags = 0x0001;
- rsp->scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp->result = __cpu_to_le16(result ? *result : 0);
- rsp->flags = __cpu_to_le16(flags);
+ rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp->result = cpu_to_le16(result ? *result : 0);
+ rsp->flags = cpu_to_le16(flags);
return ptr - data;
}
@@ -1470,10 +1476,10 @@ response:
bh_unlock_sock(parent);
sendresp:
- rsp.scid = __cpu_to_le16(scid);
- rsp.dcid = __cpu_to_le16(dcid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(status);
+ rsp.scid = cpu_to_le16(scid);
+ rsp.dcid = cpu_to_le16(dcid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(status);
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
return 0;
}
@@ -1613,8 +1619,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
l2cap_sock_set_timer(sk, HZ * 5);
{
struct l2cap_disconn_req req;
- req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_DISCONN_REQ, sizeof(req), &req);
}
@@ -1652,8 +1658,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
return 0;
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1696,8 +1702,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
BT_DBG("type 0x%4.4x", type);
- rsp.type = __cpu_to_le16(type);
- rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP);
+ rsp.type = cpu_to_le16(type);
+ rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
return 0;
@@ -1794,7 +1800,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
BT_DBG("error %d", err);
/* FIXME: Map err to a valid reason */
- rej.reason = __cpu_to_le16(0);
+ rej.reason = cpu_to_le16(0);
l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
}
@@ -1993,10 +1999,10 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
result = L2CAP_CR_SEC_BLOCK;
}
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(0);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(0);
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_RSP, sizeof(rsp), &rsp);
@@ -2041,10 +2047,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
result = L2CAP_CR_SEC_BLOCK;
}
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(0);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(0);
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_RSP, sizeof(rsp), &rsp);
@@ -2107,7 +2113,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
goto drop;
- memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
conn->rx_len = len - skb->len;
} else {
BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2128,7 +2135,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
goto drop;
}
- memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
conn->rx_len -= skb->len;
if (!conn->rx_len) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 94f457360560..52e04df323ea 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -622,7 +622,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
bacpy(&addr.l2_bdaddr, src);
addr.l2_family = AF_BLUETOOTH;
addr.l2_psm = 0;
- *err = sock->ops->bind(sock, (struct sockaddr *) &addr, sizeof(addr));
+ *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
if (*err < 0)
goto failed;
@@ -643,7 +643,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
bacpy(&addr.l2_bdaddr, dst);
addr.l2_family = AF_BLUETOOTH;
addr.l2_psm = htobs(RFCOMM_PSM);
- *err = sock->ops->connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
+ *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
if (*err == 0 || *err == -EINPROGRESS)
return s;
@@ -1058,6 +1058,12 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
case BT_DISCONN:
d->state = BT_CLOSED;
__rfcomm_dlc_close(d, 0);
+
+ if (list_empty(&s->dlcs)) {
+ s->state = BT_DISCONN;
+ rfcomm_send_disc(s, 0);
+ }
+
break;
}
} else {
@@ -1067,6 +1073,10 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
s->state = BT_CONNECTED;
rfcomm_process_connect(s);
break;
+
+ case BT_DISCONN:
+ rfcomm_session_put(s);
+ break;
}
}
return 0;
@@ -1567,7 +1577,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
/* Trim FCS */
skb->len--; skb->tail--;
- fcs = *(u8 *) skb->tail;
+ fcs = *(u8 *)skb_tail_pointer(skb);
if (__check_fcs(skb->data, type, fcs)) {
BT_ERR("bad checksum in packet");
@@ -1757,19 +1767,12 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
BT_DBG("session %p", s);
- if (sock_create_lite(PF_BLUETOOTH, sock->type, BTPROTO_L2CAP, &nsock))
+ err = kernel_accept(sock, &nsock, O_NONBLOCK);
+ if (err < 0)
return;
- nsock->ops = sock->ops;
-
__module_get(nsock->ops->owner);
- err = sock->ops->accept(sock, nsock, O_NONBLOCK);
- if (err < 0) {
- sock_release(nsock);
- return;
- }
-
/* Set our callbacks */
nsock->sk->sk_data_ready = rfcomm_l2data_ready;
nsock->sk->sk_state_change = rfcomm_l2state_change;
@@ -1851,18 +1854,18 @@ static void rfcomm_worker(void)
BT_DBG("");
while (!atomic_read(&terminate)) {
+ set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
/* No pending events. Let's sleep.
* Incoming connections and data will wake us up. */
- set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
+ set_current_state(TASK_RUNNING);
/* Process stuff */
clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
rfcomm_process_sessions();
}
- set_current_state(TASK_RUNNING);
return;
}
@@ -1885,7 +1888,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
bacpy(&addr.l2_bdaddr, ba);
addr.l2_family = AF_BLUETOOTH;
addr.l2_psm = htobs(RFCOMM_PSM);
- err = sock->ops->bind(sock, (struct sockaddr *) &addr, sizeof(addr));
+ err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
if (err < 0) {
BT_ERR("Bind failed %d", err);
goto failed;
@@ -1898,7 +1901,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
release_sock(sk);
/* Start listening on the socket */
- err = sock->ops->listen(sock, 10);
+ err = kernel_listen(sock, 10);
if (err) {
BT_ERR("Listen failed %d", err);
goto failed;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 9a7a44fc721f..b2b1cceb102a 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -517,9 +517,10 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
if (dlc->state == BT_CLOSED) {
if (!dev->tty) {
if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
- rfcomm_dev_hold(dev);
- rfcomm_dev_del(dev);
+ if (rfcomm_dev_get(dev->id) == NULL)
+ return;
+ rfcomm_dev_del(dev);
/* We have to drop DLC lock here, otherwise
rfcomm_dev_put() will dead lock if it's
the last reference. */
@@ -974,8 +975,12 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
rfcomm_tty_flush_buffer(tty);
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
+ if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
+ if (rfcomm_dev_get(dev->id) == NULL)
+ return;
rfcomm_dev_del(dev);
+ rfcomm_dev_put(dev);
+ }
}
static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae4391440950..3f5163e725ed 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
default:
sock_set_flag(sk, SOCK_ZAPPED);
break;
- };
+ }
release_sock(sk);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 2994387999a8..848b8fa8bedd 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,7 +37,9 @@ static int __init br_init(void)
return -EADDRINUSE;
}
- br_fdb_init();
+ err = br_fdb_init();
+ if (err)
+ goto err_out1;
err = br_netfilter_init();
if (err)
@@ -47,7 +49,10 @@ static int __init br_init(void)
if (err)
goto err_out2;
- br_netlink_init();
+ err = br_netlink_init();
+ if (err)
+ goto err_out3;
+
brioctl_set(br_ioctl_deviceless_stub);
br_handle_frame_hook = br_handle_frame;
@@ -55,7 +60,8 @@ static int __init br_init(void)
br_fdb_put_hook = br_fdb_put;
return 0;
-
+err_out3:
+ unregister_netdevice_notifier(&br_device_notifier);
err_out2:
br_netfilter_fini();
err_out1:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 905a39c33a16..5e1892d8d874 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -37,7 +37,7 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br->statistics.tx_packets++;
br->statistics.tx_bytes += skb->len;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
if (dest[0] & 1)
@@ -83,27 +83,21 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-/* Allow setting mac address of pseudo-bridge to be same as
- * any of the bound interfaces
- */
+/* Allow setting mac address to any valid ethernet address. */
static int br_set_mac_address(struct net_device *dev, void *p)
{
struct net_bridge *br = netdev_priv(dev);
struct sockaddr *addr = p;
- struct net_bridge_port *port;
- int err = -EADDRNOTAVAIL;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EINVAL;
spin_lock_bh(&br->lock);
- list_for_each_entry(port, &br->port_list, list) {
- if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
- br_stp_change_bridge_id(br, addr->sa_data);
- err = 0;
- break;
- }
- }
+ memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+ br_stp_change_bridge_id(br, addr->sa_data);
spin_unlock_bh(&br->lock);
- return err;
+ return 0;
}
static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index def2e403f932..91b017016d5b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,19 +20,28 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/jhash.h>
+#include <linux/random.h>
#include <asm/atomic.h>
+#include <asm/unaligned.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr);
-void __init br_fdb_init(void)
+static u32 fdb_salt __read_mostly;
+
+int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
sizeof(struct net_bridge_fdb_entry),
0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!br_fdb_cache)
+ return -ENOMEM;
+
+ get_random_bytes(&fdb_salt, sizeof(fdb_salt));
+ return 0;
}
void __exit br_fdb_fini(void)
@@ -44,24 +53,26 @@ void __exit br_fdb_fini(void)
/* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
*/
-static __inline__ unsigned long hold_time(const struct net_bridge *br)
+static inline unsigned long hold_time(const struct net_bridge *br)
{
return br->topology_change ? br->forward_delay : br->ageing_time;
}
-static __inline__ int has_expired(const struct net_bridge *br,
+static inline int has_expired(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
return !fdb->is_static
&& time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
}
-static __inline__ int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac)
{
- return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1);
+ /* use 1 byte of OUI cnd 3 bytes of NIC */
+ u32 key = get_unaligned((u32 *)(mac + 2));
+ return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
}
-static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+static inline void fdb_delete(struct net_bridge_fdb_entry *f)
{
hlist_del_rcu(&f->hlist);
br_fdb_put(f);
@@ -128,7 +139,26 @@ void br_fdb_cleanup(unsigned long _data)
mod_timer(&br->gc_timer, jiffies + HZ/10);
}
+/* Completely flush all dynamic entries in forwarding database.*/
+void br_fdb_flush(struct net_bridge *br)
+{
+ int i;
+ spin_lock_bh(&br->hash_lock);
+ for (i = 0; i < BR_HASH_SIZE; i++) {
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+ if (!f->is_static)
+ fdb_delete(f);
+ }
+ }
+ spin_unlock_bh(&br->hash_lock);
+}
+
+/* Flush all entries refering to a specific port.
+ * if do_all is set also flush static entries
+ */
void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p,
int do_all)
@@ -197,8 +227,8 @@ struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br,
rcu_read_lock();
fdb = __br_fdb_get(br, addr);
- if (fdb)
- atomic_inc(&fdb->use_count);
+ if (fdb && !atomic_inc_not_zero(&fdb->use_count))
+ fdb = NULL;
rcu_read_unlock();
return fdb;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3e45c1a1aa96..ada7f495445c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -71,7 +71,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
indev = skb->dev;
skb->dev = to->dev;
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
br_forward_finish);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3a2e29be40c..849deaf14108 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -152,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p)
br_stp_disable_port(p);
spin_unlock_bh(&br->lock);
+ br_ifinfo_notify(RTM_DELLINK, p);
+
br_fdb_delete_by_port(br, p, 1);
list_del_rcu(&p->list);
@@ -203,7 +205,7 @@ static struct net_device *new_bridge_dev(const char *name)
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features;
- br->stp_enabled = 0;
+ br->stp_enabled = BR_NO_STP;
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0;
@@ -434,6 +436,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
dev_set_mtu(br->dev, br_min_mtu(br));
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -471,11 +475,9 @@ void __exit br_cleanup_bridges(void)
struct net_device *dev, *nxt;
rtnl_lock();
- for (dev = dev_base; dev; dev = nxt) {
- nxt = dev->next;
+ for_each_netdev_safe(dev, nxt)
if (dev->priv_flags & IFF_EBRIDGE)
del_br(dev->priv);
- }
rtnl_unlock();
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 35b94f9a1ac5..420bbb9955e9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,46 +112,59 @@ static int br_handle_local_finish(struct sk_buff *skb)
*/
static inline int is_link_local(const unsigned char *dest)
{
- return memcmp(dest, br_group_address, 5) == 0 && (dest[5] & 0xf0) == 0;
+ const u16 *a = (const u16 *) dest;
+ static const u16 *const b = (const u16 *const ) br_group_address;
+ static const u16 m = __constant_cpu_to_be16(0xfff0);
+
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
}
/*
* Called via br_handle_frame_hook.
- * Return 0 if *pskb should be processed furthur
- * 1 if *pskb is handled
+ * Return NULL if skb is handled
* note: already called with rcu_read_lock (preempt_disabled)
*/
-int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
+struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
- goto err;
+ goto drop;
if (unlikely(is_link_local(dest))) {
- skb->pkt_type = PACKET_HOST;
- return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
- NULL, br_handle_local_finish) != 0;
+ /* Pause frames shouldn't be passed up by driver anyway */
+ if (skb->protocol == htons(ETH_P_PAUSE))
+ goto drop;
+
+ /* Process STP BPDU's through normal netif_receive_skb() path */
+ if (p->br->stp_enabled != BR_NO_STP) {
+ if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+ NULL, br_handle_local_finish))
+ return NULL;
+ else
+ return skb;
+ }
}
- if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
+ switch (p->state) {
+ case BR_STATE_FORWARDING:
+
if (br_should_route_hook) {
- if (br_should_route_hook(pskb))
- return 0;
- skb = *pskb;
+ if (br_should_route_hook(&skb))
+ return skb;
dest = eth_hdr(skb)->h_dest;
}
-
+ /* fall through */
+ case BR_STATE_LEARNING:
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
- return 1;
+ break;
+ default:
+drop:
+ kfree_skb(skb);
}
-
-err:
- kfree_skb(skb);
- return 1;
+ return NULL;
}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 147015fe5c75..bb15e9e259b1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -27,7 +27,9 @@ static int get_bridge_ifindices(int *indices, int num)
struct net_device *dev;
int i = 0;
- for (dev = dev_base; dev && i < num; dev = dev->next) {
+ for_each_netdev(dev) {
+ if (i >= num)
+ break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
@@ -137,7 +139,8 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
b.topology_change = br->topology_change;
b.topology_change_detected = br->topology_change_detected;
b.root_port = br->root_port;
- b.stp_enabled = br->stp_enabled;
+
+ b.stp_enabled = (br->stp_enabled != BR_NO_STP);
b.ageing_time = jiffies_to_clock_t(br->ageing_time);
b.hello_timer_value = br_timer_value(&br->hello_timer);
b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -251,7 +254,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- br->stp_enabled = args[1]?1:0;
+ br_stp_set_enabled(br, args[1]);
return 0;
case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 7712d76f06ba..fa779874b9dd 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -29,6 +29,8 @@
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
@@ -48,8 +50,8 @@
#define skb_origaddr(skb) (((struct bridge_skb_cb *) \
(skb->nf_bridge->data))->daddr.ipv4)
-#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr)
-#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr)
+#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr)
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
@@ -57,11 +59,13 @@ static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 1;
+static int brnf_filter_pppoe_tagged __read_mostly = 1;
#else
#define brnf_filter_vlan_tagged 1
+#define brnf_filter_pppoe_tagged 1
#endif
-static __be16 inline vlan_proto(const struct sk_buff *skb)
+static inline __be16 vlan_proto(const struct sk_buff *skb)
{
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
}
@@ -81,6 +85,22 @@ static __be16 inline vlan_proto(const struct sk_buff *skb)
vlan_proto(skb) == htons(ETH_P_ARP) && \
brnf_filter_vlan_tagged)
+static inline __be16 pppoe_proto(const struct sk_buff *skb)
+{
+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+}
+
+#define IS_PPPOE_IP(skb) \
+ (skb->protocol == htons(ETH_P_PPP_SES) && \
+ pppoe_proto(skb) == htons(PPP_IP) && \
+ brnf_filter_pppoe_tagged)
+
+#define IS_PPPOE_IPV6(skb) \
+ (skb->protocol == htons(ETH_P_PPP_SES) && \
+ pppoe_proto(skb) == htons(PPP_IPV6) && \
+ brnf_filter_pppoe_tagged)
+
/* We need these fake structures to make netfilter happy --
* lots of places assume that skb->dst != NULL, which isn't
* all that unreasonable.
@@ -122,14 +142,36 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
return skb->nf_bridge;
}
-static inline void nf_bridge_save_header(struct sk_buff *skb)
+static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
+{
+ unsigned int len = nf_bridge_encap_header_len(skb);
+
+ skb_push(skb, len);
+ skb->network_header -= len;
+}
+
+static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
{
- int header_size = ETH_HLEN;
+ unsigned int len = nf_bridge_encap_header_len(skb);
- if (skb->protocol == htons(ETH_P_8021Q))
- header_size += VLAN_HLEN;
+ skb_pull(skb, len);
+ skb->network_header += len;
+}
+
+static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
+{
+ unsigned int len = nf_bridge_encap_header_len(skb);
+
+ skb_pull_rcsum(skb, len);
+ skb->network_header += len;
+}
- memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
+static inline void nf_bridge_save_header(struct sk_buff *skb)
+{
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+
+ skb_copy_from_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
}
/*
@@ -139,19 +181,15 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
int nf_bridge_copy_header(struct sk_buff *skb)
{
int err;
- int header_size = ETH_HLEN;
-
- if (skb->protocol == htons(ETH_P_8021Q))
- header_size += VLAN_HLEN;
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
err = skb_cow(skb, header_size);
if (err)
return err;
- memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
-
- if (skb->protocol == htons(ETH_P_8021Q))
- __skb_push(skb, VLAN_HLEN);
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
return 0;
}
@@ -172,10 +210,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
dst_hold(skb->dst);
skb->dev = nf_bridge->physindev;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -253,10 +288,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
if (!skb->dev)
kfree_skb(skb);
else {
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header(skb);
skb->dst->output(skb);
}
return 0;
@@ -265,7 +297,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
int err;
@@ -322,11 +354,7 @@ bridged_dnat:
* bridged frame */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
skb->dev = nf_bridge->physindev;
- if (skb->protocol ==
- htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
skb, skb->dev, NULL,
br_nf_pre_routing_finish_bridge,
@@ -342,10 +370,7 @@ bridged_dnat:
}
skb->dev = nf_bridge->physindev;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -372,9 +397,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
static int check_hbh_len(struct sk_buff *skb)
{
- unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
+ unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
u32 pkt_len;
- int off = raw - skb->nh.raw;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = raw - nh;
int len = (raw[1] + 1) << 3;
if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +410,9 @@ static int check_hbh_len(struct sk_buff *skb)
len -= 2;
while (len > 0) {
- int optlen = skb->nh.raw[off + 1] + 2;
+ int optlen = nh[off + 1] + 2;
- switch (skb->nh.raw[off]) {
+ switch (nh[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
@@ -395,17 +421,18 @@ static int check_hbh_len(struct sk_buff *skb)
break;
case IPV6_TLV_JUMBO:
- if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
+ if (nh[off + 1] != 4 || (off & 3) != 2)
goto bad;
- pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
+ pkt_len = ntohl(*(__be32 *) (nh + off + 2));
if (pkt_len <= IPV6_MAXPLEN ||
- skb->nh.ipv6h->payload_len)
+ ipv6_hdr(skb)->payload_len)
goto bad;
if (pkt_len > skb->len - sizeof(struct ipv6hdr))
goto bad;
if (pskb_trim_rcsum(skb,
pkt_len + sizeof(struct ipv6hdr)))
goto bad;
+ nh = skb_network_header(skb);
break;
default:
if (optlen > len)
@@ -439,7 +466,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto inhdr_error;
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
if (hdr->version != 6)
goto inhdr_error;
@@ -485,18 +512,15 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
__u32 len;
struct sk_buff *skb = *pskb;
- if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) {
+ if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+ IS_PPPOE_IPV6(skb)) {
#ifdef CONFIG_SYSCTL
if (!brnf_call_ip6tables)
return NF_ACCEPT;
#endif
if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
goto out;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull_rcsum(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header_rcsum(skb);
return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
}
#ifdef CONFIG_SYSCTL
@@ -504,28 +528,25 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
return NF_ACCEPT;
#endif
- if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb))
+ if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
+ !IS_PPPOE_IP(skb))
return NF_ACCEPT;
if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
goto out;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull_rcsum(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header_rcsum(skb);
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, 4 * iph->ihl))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
goto inhdr_error;
@@ -591,10 +612,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
} else {
in = *((struct net_device **)(skb->cb));
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
skb->dev, br_forward_finish, 1);
return 0;
@@ -622,15 +640,13 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
if (!parent)
return NF_DROP;
- if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+ IS_PPPOE_IP(skb))
pf = PF_INET;
else
pf = PF_INET6;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header(*pskb);
nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -664,15 +680,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
if (skb->protocol != htons(ETH_P_ARP)) {
if (!IS_VLAN_ARP(skb))
return NF_ACCEPT;
- skb_pull(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw += VLAN_HLEN;
+ nf_bridge_pull_encap_header(*pskb);
}
- if (skb->nh.arph->ar_pln != 4) {
- if (IS_VLAN_ARP(skb)) {
- skb_push(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw -= VLAN_HLEN;
- }
+ if (arp_hdr(skb)->ar_pln != 4) {
+ if (IS_VLAN_ARP(skb))
+ nf_bridge_push_encap_header(*pskb);
return NF_ACCEPT;
}
*d = (struct net_device *)in;
@@ -719,10 +732,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
br_forward_finish);
@@ -753,7 +763,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
#ifdef CONFIG_NETFILTER_DEBUG
/* Be very paranoid. This probably won't happen anymore, but let's
* keep the check just to be sure... */
- if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+ if (skb_mac_header(skb) < skb->head ||
+ skb_mac_header(skb) + ETH_HLEN > skb->data) {
printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
"bad mac.raw pointer.\n");
goto print_error;
@@ -766,7 +777,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
if (!realoutdev)
return NF_DROP;
- if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+ IS_PPPOE_IP(skb))
pf = PF_INET;
else
pf = PF_INET6;
@@ -785,11 +797,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
nf_bridge->mask |= BRNF_PKT_TYPE;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
-
+ nf_bridge_pull_encap_header(skb);
nf_bridge_save_header(skb);
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -808,7 +816,7 @@ print_error:
if (realoutdev)
printk("[%s]", realoutdev->name);
}
- printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+ printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
skb->data);
dump_stack();
return NF_ACCEPT;
@@ -925,6 +933,14 @@ static ctl_table brnf_table[] = {
.mode = 0644,
.proc_handler = &brnf_sysctl_call_tables,
},
+ {
+ .ctl_name = NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
+ .procname = "bridge-nf-filter-pppoe-tagged",
+ .data = &brnf_filter_pppoe_tagged,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &brnf_sysctl_call_tables,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 7d68b24b5654..0fcf6f073064 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,8 +11,7 @@
*/
#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
#include "br_private.h"
static inline size_t br_nlmsg_size(void)
@@ -110,8 +109,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
int idx;
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next) {
+ idx = 0;
+ for_each_netdev(dev) {
/* not a bridge port */
if (dev->br_port == NULL || idx < cb->args[0])
goto skip;
@@ -123,7 +122,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
skip:
++idx;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
@@ -166,7 +164,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -EINVAL;
/* if kernel STP is running, don't allow changes */
- if (p->br->stp_enabled)
+ if (p->br->stp_enabled == BR_KERNEL_STP)
return -EBUSY;
if (!netif_running(dev) ||
@@ -179,18 +177,19 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
-static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_GETLINK - RTM_BASE] = { .dumpit = br_dump_ifinfo, },
- [RTM_SETLINK - RTM_BASE] = { .doit = br_rtm_setlink, },
-};
-
-void __init br_netlink_init(void)
+int __init br_netlink_init(void)
{
- rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table;
+ if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo))
+ return -ENOBUFS;
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
+
+ return 0;
}
void __exit br_netlink_fini(void)
{
- rtnetlink_links[PF_BRIDGE] = NULL;
+ rtnl_unregister_all(PF_BRIDGE);
}
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 37357ed2149b..c8451d3a070c 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -50,7 +50,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
- br_ifinfo_notify(RTM_NEWLINK, p);
br_stp_recalculate_bridge_id(br);
spin_unlock_bh(&br->lock);
break;
@@ -74,10 +73,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
case NETDEV_UP:
- spin_lock_bh(&br->lock);
- if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP))
+ if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+ spin_lock_bh(&br->lock);
br_stp_enable_port(p);
- spin_unlock_bh(&br->lock);
+ spin_unlock_bh(&br->lock);
+ }
break;
case NETDEV_UNREGISTER:
@@ -85,5 +85,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
}
+ /* Events that may cause spanning tree to refresh */
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
return NOTIFY_DONE;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc3f1c99261a..21bf3a9a03fd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -26,7 +26,10 @@
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
-#define BR_VERSION "2.2"
+#define BR_VERSION "2.3"
+
+/* Path to usermode spanning tree program */
+#define BR_STP_PROG "/sbin/bridge-stp"
typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
@@ -107,7 +110,13 @@ struct net_bridge
u8 group_addr[ETH_ALEN];
u16 root_port;
- unsigned char stp_enabled;
+
+ enum {
+ BR_NO_STP, /* no spanning tree */
+ BR_KERNEL_STP, /* old STP in kernel */
+ BR_USER_STP, /* new RSTP in userspace */
+ } stp_enabled;
+
unsigned char topology_change;
unsigned char topology_change_detected;
@@ -127,14 +136,14 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
return !memcmp(&br->bridge_id, &br->designated_root, 8);
}
-
/* br_device.c */
extern void br_dev_setup(struct net_device *dev);
extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
/* br_fdb.c */
-extern void br_fdb_init(void);
+extern int br_fdb_init(void);
extern void br_fdb_fini(void);
+extern void br_fdb_flush(struct net_bridge *br);
extern void br_fdb_changeaddr(struct net_bridge_port *p,
const unsigned char *newaddr);
extern void br_fdb_cleanup(unsigned long arg);
@@ -182,7 +191,8 @@ extern void br_features_recompute(struct net_bridge *br);
/* br_input.c */
extern int br_handle_frame_finish(struct sk_buff *skb);
-extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb);
+extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
+ struct sk_buff *skb);
/* br_ioctl.c */
extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -207,6 +217,7 @@ extern void br_become_designated_port(struct net_bridge_port *p);
/* br_stp_if.c */
extern void br_stp_enable_bridge(struct net_bridge *br);
extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
extern void br_stp_enable_port(struct net_bridge_port *p);
extern void br_stp_disable_port(struct net_bridge_port *p);
extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
@@ -235,7 +246,7 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
/* br_netlink.c */
-extern void br_netlink_init(void);
+extern int br_netlink_init(void);
extern void br_netlink_fini(void);
extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f9ff4d57b0d7..0e035d6162cc 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -13,7 +13,6 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
-#include <linux/smp_lock.h>
#include "br_private.h"
#include "br_private_stp.h"
@@ -370,11 +369,11 @@ static void br_make_blocking(struct net_bridge_port *p)
static void br_make_forwarding(struct net_bridge_port *p)
{
if (p->state == BR_STATE_BLOCKING) {
- if (p->br->stp_enabled) {
+ if (p->br->stp_enabled == BR_KERNEL_STP)
p->state = BR_STATE_LISTENING;
- } else {
+ else
p->state = BR_STATE_LEARNING;
- }
+
br_log_state(p);
mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); }
}
@@ -384,6 +383,10 @@ void br_port_state_selection(struct net_bridge *br)
{
struct net_bridge_port *p;
+ /* Don't change port states if userspace is handling STP */
+ if (br->stp_enabled == BR_USER_STP)
+ return;
+
list_for_each_entry(p, &br->port_list, list) {
if (p->state != BR_STATE_DISABLED) {
if (p->port_no == br->root_port) {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b9fb0dc4ab12..60112bce6698 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -33,9 +33,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
{
struct sk_buff *skb;
- if (!p->br->stp_enabled)
- return;
-
skb = dev_alloc_skb(length+LLC_RESERVE);
if (!skb)
return;
@@ -75,6 +72,9 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
{
unsigned char buf[35];
+ if (p->br->stp_enabled != BR_KERNEL_STP)
+ return;
+
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
@@ -117,6 +117,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
{
unsigned char buf[4];
+ if (p->br->stp_enabled != BR_KERNEL_STP)
+ return;
+
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
@@ -157,9 +160,13 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
br = p->br;
spin_lock(&br->lock);
- if (p->state == BR_STATE_DISABLED
- || !br->stp_enabled
- || !(br->dev->flags & IFF_UP))
+ if (br->stp_enabled != BR_KERNEL_STP)
+ goto out;
+
+ if (!(br->dev->flags & IFF_UP))
+ goto out;
+
+ if (p->state == BR_STATE_DISABLED)
goto out;
if (compare_ether_addr(dest, br->group_addr) != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 58d13f2bd121..a786e7863200 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -14,7 +14,6 @@
*/
#include <linux/kernel.h>
-#include <linux/smp_lock.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
@@ -87,7 +86,6 @@ void br_stp_disable_bridge(struct net_bridge *br)
void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
- br_ifinfo_notify(RTM_NEWLINK, p);
br_port_state_selection(p->br);
}
@@ -101,8 +99,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
br->dev->name, p->port_no, p->dev->name, "disabled");
- br_ifinfo_notify(RTM_DELLINK, p);
-
wasroot = br_is_root_bridge(br);
br_become_designated_port(p);
p->state = BR_STATE_DISABLED;
@@ -123,10 +119,68 @@ void br_stp_disable_port(struct net_bridge_port *p)
br_become_root_bridge(br);
}
+static void br_stp_start(struct net_bridge *br)
+{
+ int r;
+ char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ char *envp[] = { NULL };
+
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+ if (r == 0) {
+ br->stp_enabled = BR_USER_STP;
+ printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+ } else {
+ br->stp_enabled = BR_KERNEL_STP;
+ printk(KERN_INFO "%s: starting userspace STP failed, "
+ "staring kernel STP\n", br->dev->name);
+
+ /* To start timers on any ports left in blocking */
+ spin_lock_bh(&br->lock);
+ br_port_state_selection(br);
+ spin_unlock_bh(&br->lock);
+ }
+}
+
+static void br_stp_stop(struct net_bridge *br)
+{
+ int r;
+ char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
+ char *envp[] = { NULL };
+
+ if (br->stp_enabled == BR_USER_STP) {
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+ printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
+ br->dev->name, r);
+
+
+ /* To start timers on any ports left in blocking */
+ spin_lock_bh(&br->lock);
+ br_port_state_selection(br);
+ spin_unlock_bh(&br->lock);
+ }
+
+ br->stp_enabled = BR_NO_STP;
+}
+
+void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+{
+ ASSERT_RTNL();
+
+ if (val) {
+ if (br->stp_enabled == BR_NO_STP)
+ br_stp_start(br);
+ } else {
+ if (br->stp_enabled != BR_NO_STP)
+ br_stp_stop(br);
+ }
+}
+
/* called under bridge lock */
void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
{
- unsigned char oldaddr[6];
+ /* should be aligned on 2 bytes for compare_ether_addr() */
+ unsigned short oldaddr_aligned[ETH_ALEN >> 1];
+ unsigned char *oldaddr = (unsigned char *)oldaddr_aligned;
struct net_bridge_port *p;
int wasroot;
@@ -151,11 +205,14 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
br_become_root_bridge(br);
}
-static const unsigned char br_mac_zero[6];
+/* should be aligned on 2 bytes for compare_ether_addr() */
+static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
/* called under bridge lock */
void br_stp_recalculate_bridge_id(struct net_bridge *br)
{
+ const unsigned char *br_mac_zero =
+ (const unsigned char *)br_mac_zero_aligned;
const unsigned char *addr = br_mac_zero;
struct net_bridge_port *p;
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 030aa798fea7..24e0ca4a3131 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -15,7 +15,6 @@
#include <linux/kernel.h>
#include <linux/times.h>
-#include <linux/smp_lock.h>
#include "br_private.h"
#include "br_private_stp.h"
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 01a22ad0cc75..33c6c4a7c689 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,7 +149,11 @@ static ssize_t show_stp_state(struct device *d,
static void set_stp_state(struct net_bridge *br, unsigned long val)
{
- br->stp_enabled = val;
+ rtnl_lock();
+ spin_unlock_bh(&br->lock);
+ br_stp_set_enabled(br, val);
+ spin_lock_bh(&br->lock);
+ rtnl_unlock();
}
static ssize_t store_stp_state(struct device *d,
@@ -309,6 +313,19 @@ static ssize_t store_group_addr(struct device *d,
static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
show_group_addr, store_group_addr);
+static ssize_t store_flush(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct net_bridge *br = to_bridge(d);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ br_fdb_flush(br);
+ return len;
+}
+static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
static struct attribute *bridge_attrs[] = {
&dev_attr_forward_delay.attr,
@@ -328,6 +345,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_topology_change_timer.attr,
&dev_attr_gc_timer.attr,
&dev_attr_group_addr.attr,
+ &dev_attr_flush.attr,
NULL
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0bc2aef8f9f3..2da22927d8dd 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -137,6 +137,13 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
}
static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static ssize_t store_flush(struct net_bridge_port *p, unsigned long v)
+{
+ br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
+ return 0;
+}
+static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+
static struct brport_attribute *brport_attrs[] = {
&brport_attr_path_cost,
&brport_attr_priority,
@@ -152,6 +159,7 @@ static struct brport_attribute *brport_attrs[] = {
&brport_attr_message_age_timer,
&brport_attr_forward_delay_timer,
&brport_attr_hold_timer,
+ &brport_attr_flush,
NULL
};
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 9c599800a900..1a46952a56d9 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,40 +35,36 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
return EBT_NOMATCH;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
- __be32 _addr, *ap;
+ __be32 saddr, daddr, *sap, *dap;
- /* IPv4 addresses are always 4 bytes */
- if (ah->ar_pln != sizeof(__be32))
+ if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
+ return EBT_NOMATCH;
+ sap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ ah->ar_hln, sizeof(saddr),
+ &saddr);
+ if (sap == NULL)
+ return EBT_NOMATCH;
+ dap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ 2*ah->ar_hln+sizeof(saddr),
+ sizeof(daddr), &daddr);
+ if (dap == NULL)
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_SRC_IP &&
+ FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_DST_IP &&
+ FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_GRAT &&
+ FWINV(*dap != *sap, EBT_ARP_GRAT))
return EBT_NOMATCH;
- if (info->bitmask & EBT_ARP_SRC_IP) {
- ap = skb_header_pointer(skb, sizeof(struct arphdr) +
- ah->ar_hln, sizeof(_addr),
- &_addr);
- if (ap == NULL)
- return EBT_NOMATCH;
- if (FWINV(info->saddr != (*ap & info->smsk),
- EBT_ARP_SRC_IP))
- return EBT_NOMATCH;
- }
-
- if (info->bitmask & EBT_ARP_DST_IP) {
- ap = skb_header_pointer(skb, sizeof(struct arphdr) +
- 2*ah->ar_hln+sizeof(__be32),
- sizeof(_addr), &_addr);
- if (ap == NULL)
- return EBT_NOMATCH;
- if (FWINV(info->daddr != (*ap & info->dmsk),
- EBT_ARP_DST_IP))
- return EBT_NOMATCH;
- }
}
if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
unsigned char _mac[ETH_ALEN], *mp;
uint8_t verdict, i;
- /* MAC addresses are 6 bytes */
- if (ah->ar_hln != ETH_ALEN)
+ if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_SRC_MAC) {
mp = skb_header_pointer(skb, sizeof(struct arphdr),
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 45712aec6a0e..031bfa4a51fc 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -196,14 +196,10 @@ static int __init ebt_log_init(void)
ret = ebt_register_watcher(&log);
if (ret < 0)
return ret;
- if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
- printk(KERN_WARNING "ebt_log: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * ebtables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_BRIDGE, &ebt_log_logger);
+ if (ret < 0 && ret != -EEXIST)
+ ebt_unregister_watcher(&log);
+ return ret;
}
static void __exit ebt_log_fini(void)
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8e15cc47f6c0..9411db625917 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
unsigned int group = uloginfo->nlgroup;
ebt_ulog_buff_t *ub = &ulog_buffers[group];
spinlock_t *lock = &ub->lock;
+ ktime_t kt;
if ((uloginfo->cprange == 0) ||
(uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
/* Fill in the ulog data */
pm->version = EBT_ULOG_VERSION;
- do_gettimeofday(&pm->stamp);
+ kt = ktime_get_real();
+ pm->stamp = ktime_to_timeval(kt);
if (ub->qlen == 1)
- skb_set_timestamp(ub->skb, &pm->stamp);
+ ub->skb->tstamp = kt;
pm->data_len = copy_len;
pm->mark = skb->mark;
pm->hook = hooknr;
@@ -295,14 +297,12 @@ static int __init ebt_ulog_init(void)
/* initialize ulog_buffers */
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- init_timer(&ulog_buffers[i].timer);
- ulog_buffers[i].timer.function = ulog_timer;
- ulog_buffers[i].timer.data = i;
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
spin_lock_init(&ulog_buffers[i].lock);
}
ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
- NULL, THIS_MODULE);
+ NULL, NULL, THIS_MODULE);
if (!ebtulognl)
ret = -ENOMEM;
else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c
index 1f32866d09b7..9a0f5f2b90c8 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,11 +34,11 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
{
int tot_len = 0;
- while(niov > 0) {
+ while (niov > 0) {
compat_uptr_t buf;
compat_size_t len;
- if(get_user(len, &uiov32->iov_len) ||
+ if (get_user(len, &uiov32->iov_len) ||
get_user(buf, &uiov32->iov_base)) {
tot_len = -EFAULT;
break;
@@ -78,12 +78,12 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
{
int tot_len;
- if(kern_msg->msg_namelen) {
- if(mode==VERIFY_READ) {
+ if (kern_msg->msg_namelen) {
+ if (mode==VERIFY_READ) {
int err = move_addr_to_kernel(kern_msg->msg_name,
kern_msg->msg_namelen,
kern_address);
- if(err < 0)
+ if (err < 0)
return err;
}
kern_msg->msg_name = kern_address;
@@ -93,7 +93,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
tot_len = iov_from_user_compat_to_kern(kern_iov,
(struct compat_iovec __user *)kern_msg->msg_iov,
kern_msg->msg_iovlen);
- if(tot_len >= 0)
+ if (tot_len >= 0)
kern_msg->msg_iov = kern_iov;
return tot_len;
@@ -146,8 +146,8 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
kcmlen = 0;
kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
- while(ucmsg != NULL) {
- if(get_user(ucmlen, &ucmsg->cmsg_len))
+ while (ucmsg != NULL) {
+ if (get_user(ucmlen, &ucmsg->cmsg_len))
return -EFAULT;
/* Catch bogons. */
@@ -160,7 +160,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
kcmlen += tmp;
ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
}
- if(kcmlen == 0)
+ if (kcmlen == 0)
return -EINVAL;
/* The kcmlen holds the 64-bit version of the control length.
@@ -176,7 +176,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
/* Now copy them over neatly. */
memset(kcmsg, 0, kcmlen);
ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
- while(ucmsg != NULL) {
+ while (ucmsg != NULL) {
if (__get_user(ucmlen, &ucmsg->cmsg_len))
goto Efault;
if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
@@ -215,11 +215,12 @@ Efault:
int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
{
struct compat_timeval ctv;
+ struct compat_timespec cts;
struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
struct compat_cmsghdr cmhdr;
int cmlen;
- if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+ if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
kmsg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
@@ -229,11 +230,18 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
ctv.tv_sec = tv->tv_sec;
ctv.tv_usec = tv->tv_usec;
data = &ctv;
- len = sizeof(struct compat_timeval);
+ len = sizeof(ctv);
+ }
+ if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) {
+ struct timespec *ts = (struct timespec *)data;
+ cts.tv_sec = ts->tv_sec;
+ cts.tv_nsec = ts->tv_nsec;
+ data = &cts;
+ len = sizeof(cts);
}
cmlen = CMSG_COMPAT_LEN(len);
- if(kmsg->msg_controllen < cmlen) {
+ if (kmsg->msg_controllen < cmlen) {
kmsg->msg_flags |= MSG_CTRUNC;
cmlen = kmsg->msg_controllen;
}
@@ -241,9 +249,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
- if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
return -EFAULT;
- if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+ if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
return -EFAULT;
cmlen = CMSG_COMPAT_SPACE(len);
kmsg->msg_control += cmlen;
@@ -545,20 +553,49 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
struct compat_timeval __user *ctv =
(struct compat_timeval __user*) userstamp;
int err = -ENOENT;
+ struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk);
- if (sk->sk_stamp.tv_sec == -1)
+ tv = ktime_to_timeval(sk->sk_stamp);
+ if (tv.tv_sec == -1)
return err;
- if (sk->sk_stamp.tv_sec == 0)
- do_gettimeofday(&sk->sk_stamp);
- if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
- put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
+ if (tv.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ tv = ktime_to_timeval(sk->sk_stamp);
+ }
+ err = 0;
+ if (put_user(tv.tv_sec, &ctv->tv_sec) ||
+ put_user(tv.tv_usec, &ctv->tv_usec))
err = -EFAULT;
return err;
}
EXPORT_SYMBOL(compat_sock_get_timestamp);
+int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+ struct compat_timespec __user *ctv =
+ (struct compat_timespec __user*) userstamp;
+ int err = -ENOENT;
+ struct timespec ts;
+
+ if (!sock_flag(sk, SOCK_TIMESTAMP))
+ sock_enable_timestamp(sk);
+ ts = ktime_to_timespec(sk->sk_stamp);
+ if (ts.tv_sec == -1)
+ return err;
+ if (ts.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ ts = ktime_to_timespec(sk->sk_stamp);
+ }
+ err = 0;
+ if (put_user(ts.tv_sec, &ctv->tv_sec) ||
+ put_user(ts.tv_nsec, &ctv->tv_nsec))
+ err = -EFAULT;
+ return err;
+}
+EXPORT_SYMBOL(compat_sock_get_timestampns);
+
asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -617,7 +654,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
a0 = a[0];
a1 = a[1];
- switch(call) {
+ switch (call) {
case SYS_SOCKET:
ret = sys_socket(a0, a1, a[2]);
break;
diff --git a/net/core/Makefile b/net/core/Makefile
index 73272d506e93..4751613e1b59 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
-obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186212b5b7da..cb056f476126 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -411,11 +411,11 @@ fault:
return -EFAULT;
}
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
__sum16 sum;
- sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
@@ -423,6 +423,12 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
}
return sum;
}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+ return __skb_checksum_complete_head(skb, skb->len);
+}
EXPORT_SYMBOL(__skb_checksum_complete);
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index cf71614dae93..4317c1be4d3f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -109,7 +109,7 @@
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
-#include <linux/wireless.h>
+#include <net/wext.h>
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
@@ -146,8 +146,8 @@
*/
static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16]; /* 16 way hashed list */
-static struct list_head ptype_all; /* Taps */
+static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
+static struct list_head ptype_all __read_mostly; /* Taps */
#ifdef CONFIG_NET_DMA
static struct dma_client *net_dma_client;
@@ -156,13 +156,13 @@ static spinlock_t net_dma_event_lock;
#endif
/*
- * The @dev_base list is protected by @dev_base_lock and the rtnl
+ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
* semaphore.
*
* Pure readers hold dev_base_lock for reading.
*
* Writers must hold the rtnl semaphore while they loop through the
- * dev_base list, and hold dev_base_lock for writing when they do the
+ * dev_base_head list, and hold dev_base_lock for writing when they do the
* actual updates. This allows pure readers to access the list even
* while a writer is preparing to update it.
*
@@ -174,11 +174,10 @@ static spinlock_t net_dma_event_lock;
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
-struct net_device *dev_base;
-static struct net_device **dev_tail = &dev_base;
+LIST_HEAD(dev_base_head);
DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_head);
EXPORT_SYMBOL(dev_base_lock);
#define NETDEV_HASHBITS 8
@@ -226,12 +225,6 @@ extern void netdev_unregister_sysfs(struct net_device *);
*******************************************************************************/
/*
- * For efficiency
- */
-
-static int netdev_nit;
-
-/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
@@ -265,10 +258,9 @@ void dev_add_pack(struct packet_type *pt)
int hash;
spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL)) {
- netdev_nit++;
+ if (pt->type == htons(ETH_P_ALL))
list_add_rcu(&pt->list, &ptype_all);
- } else {
+ else {
hash = ntohs(pt->type) & 15;
list_add_rcu(&pt->list, &ptype_base[hash]);
}
@@ -295,10 +287,9 @@ void __dev_remove_pack(struct packet_type *pt)
spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL)) {
- netdev_nit--;
+ if (pt->type == htons(ETH_P_ALL))
head = &ptype_all;
- } else
+ else
head = &ptype_base[ntohs(pt->type) & 15];
list_for_each_entry(pt1, head, list) {
@@ -575,26 +566,38 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
ASSERT_RTNL();
- for (dev = dev_base; dev; dev = dev->next)
+ for_each_netdev(dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
- break;
- return dev;
+ return dev;
+
+ return NULL;
}
EXPORT_SYMBOL(dev_getbyhwaddr);
+struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+ for_each_netdev(dev)
+ if (dev->type == type)
+ return dev;
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
struct net_device *dev_getfirstbyhwtype(unsigned short type)
{
struct net_device *dev;
rtnl_lock();
- for (dev = dev_base; dev; dev = dev->next) {
- if (dev->type == type) {
- dev_hold(dev);
- break;
- }
- }
+ dev = __dev_getfirstbyhwtype(type);
+ if (dev)
+ dev_hold(dev);
rtnl_unlock();
return dev;
}
@@ -614,17 +617,19 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
{
- struct net_device *dev;
+ struct net_device *dev, *ret;
+ ret = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
+ ret = dev;
break;
}
}
read_unlock(&dev_base_lock);
- return dev;
+ return ret;
}
/**
@@ -690,7 +695,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
if (!inuse)
return -ENOMEM;
- for (d = dev_base; d; d = d->next) {
+ for_each_netdev(d) {
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
@@ -751,13 +756,10 @@ int dev_change_name(struct net_device *dev, char *newname)
else
strlcpy(dev->name, newname, IFNAMSIZ);
- err = device_rename(&dev->dev, dev->name);
- if (!err) {
- hlist_del(&dev->name_hlist);
- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGENAME, dev);
- }
+ device_rename(&dev->dev, dev->name);
+ hlist_del(&dev->name_hlist);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+ raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
return err;
}
@@ -820,7 +822,6 @@ static int default_rebuild_header(struct sk_buff *skb)
return 1;
}
-
/**
* dev_open - prepare an interface for use.
* @dev: device to open
@@ -976,7 +977,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (!err) {
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
nb->notifier_call(nb, NETDEV_REGISTER, dev);
if (dev->flags & IFF_UP)
@@ -1034,23 +1035,12 @@ void net_disable_timestamp(void)
atomic_dec(&netstamp_needed);
}
-void __net_timestamp(struct sk_buff *skb)
-{
- struct timeval tv;
-
- do_gettimeofday(&tv);
- skb_set_timestamp(skb, &tv);
-}
-EXPORT_SYMBOL(__net_timestamp);
-
static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
- else {
- skb->tstamp.off_sec = 0;
- skb->tstamp.off_usec = 0;
- }
+ else
+ skb->tstamp.tv64 = 0;
}
/*
@@ -1080,18 +1070,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
set by sender, so that the second statement is
just protection against buggy protocols.
*/
- skb2->mac.raw = skb2->data;
+ skb_reset_mac_header(skb2);
- if (skb2->nh.raw < skb2->data ||
- skb2->nh.raw > skb2->tail) {
+ if (skb_network_header(skb2) < skb2->data ||
+ skb2->network_header > skb2->tail) {
if (net_ratelimit())
printk(KERN_CRIT "protocol %04x is "
"buggy, dev %s\n",
skb2->protocol, dev->name);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
}
- skb2->h.raw = skb2->nh.raw;
+ skb2->transport_header = skb2->network_header;
skb2->pkt_type = PACKET_OUTGOING;
ptype->func(skb2, skb->dev, ptype, skb->dev);
}
@@ -1170,7 +1160,7 @@ EXPORT_SYMBOL(netif_device_attach);
int skb_checksum_help(struct sk_buff *skb)
{
__wsum csum;
- int ret = 0, offset = skb->h.raw - skb->data;
+ int ret = 0, offset;
if (skb->ip_summed == CHECKSUM_COMPLETE)
goto out_set_summed;
@@ -1186,15 +1176,16 @@ int skb_checksum_help(struct sk_buff *skb)
goto out;
}
+ offset = skb->csum_start - skb_headroom(skb);
BUG_ON(offset > (int)skb->len);
csum = skb_checksum(skb, offset, skb->len-offset, 0);
- offset = skb->tail - skb->h.raw;
+ offset = skb_headlen(skb) - offset;
BUG_ON(offset <= 0);
BUG_ON(skb->csum_offset + 2 > offset);
- *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
-
+ *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
+ csum_fold(csum);
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
@@ -1220,11 +1211,11 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
BUG_ON(skb_shinfo(skb)->frag_list);
- skb->mac.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->data;
+ skb_reset_mac_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
- if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
if (skb_header_cloned(skb) &&
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
return ERR_PTR(err);
@@ -1238,7 +1229,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
- __skb_push(skb, skb->data - skb->nh.raw);
+ __skb_push(skb, (skb->data -
+ skb_network_header(skb)));
}
segs = ptype->gso_segment(skb, features);
break;
@@ -1246,7 +1238,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
}
rcu_read_unlock();
- __skb_push(skb, skb->data - skb->mac.raw);
+ __skb_push(skb, skb->data - skb_mac_header(skb));
return segs;
}
@@ -1343,7 +1335,7 @@ static int dev_gso_segment(struct sk_buff *skb)
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
if (likely(!skb->next)) {
- if (netdev_nit)
+ if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
if (netif_needs_gso(dev, skb)) {
@@ -1445,12 +1437,16 @@ int dev_queue_xmit(struct sk_buff *skb)
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (!(dev->features & NETIF_F_GEN_CSUM) &&
- (!(dev->features & NETIF_F_IP_CSUM) ||
- skb->protocol != htons(ETH_P_IP))))
- if (skb_checksum_help(skb))
- goto out_kfree_skb;
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_set_transport_header(skb, skb->csum_start -
+ skb_headroom(skb));
+
+ if (!(dev->features & NETIF_F_GEN_CSUM) &&
+ (!(dev->features & NETIF_F_IP_CSUM) ||
+ skb->protocol != htons(ETH_P_IP)))
+ if (skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
gso:
spin_lock_prefetch(&dev->queue_lock);
@@ -1546,9 +1542,9 @@ out:
Receiver routines
=======================================================================*/
-int netdev_max_backlog = 1000;
-int netdev_budget = 300;
-int weight_p = 64; /* old backlog weight */
+int netdev_max_backlog __read_mostly = 1000;
+int netdev_budget __read_mostly = 300;
+int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
@@ -1580,7 +1576,7 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->tstamp.off_sec)
+ if (!skb->tstamp.tv64)
net_timestamp(skb);
/*
@@ -1687,40 +1683,46 @@ static void net_tx_action(struct softirq_action *h)
}
}
-static __inline__ int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev,
- struct net_device *orig_dev)
+static inline int deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+/* These hooks defined here for ATM */
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
-void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
-static __inline__ int handle_bridge(struct sk_buff **pskb,
- struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
+/*
+ * If bridge module is loaded call bridging hook.
+ * returns NULL if packet was consumed.
+ */
+struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+ struct sk_buff *skb) __read_mostly;
+static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
+ struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev)
{
struct net_bridge_port *port;
- if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
- (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
- return 0;
+ if (skb->pkt_type == PACKET_LOOPBACK ||
+ (port = rcu_dereference(skb->dev->br_port)) == NULL)
+ return skb;
if (*pt_prev) {
- *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- return br_handle_frame_hook(port, pskb);
+ return br_handle_frame_hook(port, skb);
}
#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
#endif
#ifdef CONFIG_NET_CLS_ACT
@@ -1741,8 +1743,8 @@ static int ing_filter(struct sk_buff *skb)
if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
- printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
- skb->input_dev->name, skb->dev->name);
+ printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
+ skb->iif, skb->dev->ifindex);
return TC_ACT_SHOT;
}
@@ -1772,11 +1774,11 @@ int netif_receive_skb(struct sk_buff *skb)
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->tstamp.off_sec)
+ if (!skb->tstamp.tv64)
net_timestamp(skb);
- if (!skb->input_dev)
- skb->input_dev = skb->dev;
+ if (!skb->iif)
+ skb->iif = skb->dev->ifindex;
orig_dev = skb_bond(skb);
@@ -1785,8 +1787,9 @@ int netif_receive_skb(struct sk_buff *skb)
__get_cpu_var(netdev_rx_stat).total++;
- skb->h.raw = skb->nh.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->mac.raw;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
pt_prev = NULL;
@@ -1826,7 +1829,8 @@ int netif_receive_skb(struct sk_buff *skb)
ncls:
#endif
- if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
+ skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
goto out;
type = skb->protocol;
@@ -2047,7 +2051,7 @@ static int dev_ifconf(char __user *arg)
*/
total = 0;
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
@@ -2079,26 +2083,28 @@ static int dev_ifconf(char __user *arg)
* This is invoked by the /proc filesystem handler to display a device
* in detail.
*/
-static __inline__ struct net_device *dev_get_idx(loff_t pos)
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ loff_t off;
struct net_device *dev;
- loff_t i;
- for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+ read_lock(&dev_base_lock);
+ if (!*pos)
+ return SEQ_START_TOKEN;
- return i == pos ? dev : NULL;
-}
+ off = 1;
+ for_each_netdev(dev)
+ if (off++ == *pos)
+ return dev;
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock(&dev_base_lock);
- return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return NULL;
}
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+ return v == SEQ_START_TOKEN ?
+ first_net_device() : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2108,28 +2114,25 @@ void dev_seq_stop(struct seq_file *seq, void *v)
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
- if (dev->get_stats) {
- struct net_device_stats *stats = dev->get_stats(dev);
-
- seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
- "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
- dev->name, stats->rx_bytes, stats->rx_packets,
- stats->rx_errors,
- stats->rx_dropped + stats->rx_missed_errors,
- stats->rx_fifo_errors,
- stats->rx_length_errors + stats->rx_over_errors +
- stats->rx_crc_errors + stats->rx_frame_errors,
- stats->rx_compressed, stats->multicast,
- stats->tx_bytes, stats->tx_packets,
- stats->tx_errors, stats->tx_dropped,
- stats->tx_fifo_errors, stats->collisions,
- stats->tx_carrier_errors +
- stats->tx_aborted_errors +
- stats->tx_window_errors +
- stats->tx_heartbeat_errors,
- stats->tx_compressed);
- } else
- seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+ struct net_device_stats *stats = dev->get_stats(dev);
+
+ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ dev->name, stats->rx_bytes, stats->rx_packets,
+ stats->rx_errors,
+ stats->rx_dropped + stats->rx_missed_errors,
+ stats->rx_fifo_errors,
+ stats->rx_length_errors + stats->rx_over_errors +
+ stats->rx_crc_errors + stats->rx_frame_errors,
+ stats->rx_compressed, stats->multicast,
+ stats->tx_bytes, stats->tx_packets,
+ stats->tx_errors, stats->tx_dropped,
+ stats->tx_fifo_errors, stats->collisions,
+ stats->tx_carrier_errors +
+ stats->tx_aborted_errors +
+ stats->tx_window_errors +
+ stats->tx_heartbeat_errors,
+ stats->tx_compressed);
}
/*
@@ -2188,7 +2191,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
.stop = dev_seq_stop,
@@ -2208,7 +2211,7 @@ static const struct file_operations dev_seq_fops = {
.release = seq_release,
};
-static struct seq_operations softnet_seq_ops = {
+static const struct seq_operations softnet_seq_ops = {
.start = softnet_seq_start,
.next = softnet_seq_next,
.stop = softnet_seq_stop,
@@ -2228,12 +2231,135 @@ static const struct file_operations softnet_seq_fops = {
.release = seq_release,
};
-#ifdef CONFIG_WIRELESS_EXT
-extern int wireless_proc_init(void);
-#else
-#define wireless_proc_init() 0
+static void *ptype_get_idx(loff_t pos)
+{
+ struct packet_type *pt = NULL;
+ loff_t i = 0;
+ int t;
+
+ list_for_each_entry_rcu(pt, &ptype_all, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+
+ for (t = 0; t < 16; t++) {
+ list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+ return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ rcu_read_lock();
+ return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct packet_type *pt;
+ struct list_head *nxt;
+ int hash;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ptype_get_idx(0);
+
+ pt = v;
+ nxt = pt->list.next;
+ if (pt->type == htons(ETH_P_ALL)) {
+ if (nxt != &ptype_all)
+ goto found;
+ hash = 0;
+ nxt = ptype_base[0].next;
+ } else
+ hash = ntohs(pt->type) & 15;
+
+ while (nxt == &ptype_base[hash]) {
+ if (++hash >= 16)
+ return NULL;
+ nxt = ptype_base[hash].next;
+ }
+found:
+ return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+{
+ rcu_read_unlock();
+}
+
+static void ptype_seq_decode(struct seq_file *seq, void *sym)
+{
+#ifdef CONFIG_KALLSYMS
+ unsigned long offset = 0, symsize;
+ const char *symname;
+ char *modname;
+ char namebuf[128];
+
+ symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
+ &modname, namebuf);
+
+ if (symname) {
+ char *delim = ":";
+
+ if (!modname)
+ modname = delim = "";
+ seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
+ symname, offset);
+ return;
+ }
#endif
+ seq_printf(seq, "[%p]", sym);
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+ struct packet_type *pt = v;
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Type Device Function\n");
+ else {
+ if (pt->type == htons(ETH_P_ALL))
+ seq_puts(seq, "ALL ");
+ else
+ seq_printf(seq, "%04x", ntohs(pt->type));
+
+ seq_printf(seq, " %-8s ",
+ pt->dev ? pt->dev->name : "");
+ ptype_seq_decode(seq, pt->func);
+ seq_putc(seq, '\n');
+ }
+
+ return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+ .start = ptype_seq_start,
+ .next = ptype_seq_next,
+ .stop = ptype_seq_stop,
+ .show = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ptype_seq_ops);
+}
+
+static const struct file_operations ptype_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ptype_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+
static int __init dev_proc_init(void)
{
int rc = -ENOMEM;
@@ -2242,12 +2368,17 @@ static int __init dev_proc_init(void)
goto out;
if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
goto out_dev;
- if (wireless_proc_init())
+ if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+ goto out_dev2;
+
+ if (wext_proc_init())
goto out_softnet;
rc = 0;
out:
return rc;
out_softnet:
+ proc_net_remove("ptype");
+out_dev2:
proc_net_remove("softnet_stat");
out_dev:
proc_net_remove("dev");
@@ -2798,29 +2929,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
ret = -EFAULT;
return ret;
}
-#ifdef CONFIG_WIRELESS_EXT
/* Take care of Wireless Extensions */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- /* If command is `set a parameter', or
- * `get the encoding parameters', check if
- * the user has the right to do it */
- if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
- || cmd == SIOCGIWENCODEEXT) {
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- }
- dev_load(ifr.ifr_name);
- rtnl_lock();
- /* Follow me in net/core/wireless.c */
- ret = wireless_process_ioctl(&ifr, cmd);
- rtnl_unlock();
- if (IW_IS_GET(cmd) &&
- copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- return ret;
- }
-#endif /* CONFIG_WIRELESS_EXT */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+ return wext_handle_ioctl(&ifr, cmd, arg);
return -EINVAL;
}
}
@@ -2850,7 +2961,7 @@ static int dev_boot_phase = 1;
static DEFINE_SPINLOCK(net_todo_list_lock);
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
-static inline void net_set_todo(struct net_device *dev)
+static void net_set_todo(struct net_device *dev)
{
spin_lock(&net_todo_list_lock);
list_add_tail(&dev->todo_list, &net_todo_list);
@@ -2891,9 +3002,7 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
dev->xmit_lock_owner = -1;
-#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
-#endif
dev->iflink = -1;
@@ -2977,11 +3086,9 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
- dev->next = NULL;
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
- *dev_tail = dev;
- dev_tail = &dev->next;
+ list_add_tail(&dev->dev_list, &dev_base_head);
hlist_add_head(&dev->name_hlist, head);
hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
@@ -3005,7 +3112,7 @@ out:
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
- * This is a wrapper around register_netdev that takes the rtnl semaphore
+ * This is a wrapper around register_netdevice that takes the rtnl semaphore
* and expands the device name if you passed a format string to
* alloc_netdev.
*/
@@ -3160,6 +3267,11 @@ out:
mutex_unlock(&net_todo_run_mutex);
}
+static struct net_device_stats *internal_stats(struct net_device *dev)
+{
+ return &dev->stats;
+}
+
/**
* alloc_netdev - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -3195,6 +3307,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
if (sizeof_priv)
dev->priv = netdev_priv(dev);
+ dev->get_stats = internal_stats;
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -3249,8 +3362,6 @@ void synchronize_net(void)
void unregister_netdevice(struct net_device *dev)
{
- struct net_device *d, **dp;
-
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -3270,19 +3381,11 @@ void unregister_netdevice(struct net_device *dev)
dev_close(dev);
/* And unlink it from device chain. */
- for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
- if (d == dev) {
- write_lock_bh(&dev_base_lock);
- hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
- if (dev_tail == &dev->next)
- dev_tail = dp;
- *dp = d->next;
- write_unlock_bh(&dev_base_lock);
- break;
- }
- }
- BUG_ON(!d);
+ write_lock_bh(&dev_base_lock);
+ list_del(&dev->dev_list);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ write_unlock_bh(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERING;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 56b310c0c860..5a54053386c8 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -223,7 +223,7 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
loff_t off = 0;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (off++ == *pos)
return dev;
}
@@ -232,9 +232,8 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = v;
++*pos;
- return dev->next;
+ return next_net_device((struct net_device *)v);
}
static void dev_mc_seq_stop(struct seq_file *seq, void *v)
@@ -264,7 +263,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations dev_mc_seq_ops = {
+static const struct seq_operations dev_mc_seq_ops = {
.start = dev_mc_seq_start,
.next = dev_mc_seq_next,
.stop = dev_mc_seq_stop,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6168edd137dd..8d5e5a09b576 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -836,7 +836,7 @@ int dev_ethtool(struct ifreq *ifr)
return -EPERM;
}
- if(dev->ethtool_ops->begin)
+ if (dev->ethtool_ops->begin)
if ((rc = dev->ethtool_ops->begin(dev)) < 0)
return rc;
@@ -952,7 +952,7 @@ int dev_ethtool(struct ifreq *ifr)
rc = -EOPNOTSUPP;
}
- if(dev->ethtool_ops->complete)
+ if (dev->ethtool_ops->complete)
dev->ethtool_ops->complete(dev);
if (old_features != dev->features)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 215f1bff048f..8c5474e16683 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,6 +44,12 @@ static void rules_ops_put(struct fib_rules_ops *ops)
module_put(ops->owner);
}
+static void flush_route_cache(struct fib_rules_ops *ops)
+{
+ if (ops->flush_cache)
+ ops->flush_cache();
+}
+
int fib_rules_register(struct fib_rules_ops *ops)
{
int err = -EEXIST;
@@ -132,10 +138,25 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
rcu_read_lock();
list_for_each_entry_rcu(rule, ops->rules_list, list) {
+jumped:
if (!fib_rule_match(rule, ops, fl, flags))
continue;
- err = ops->action(rule, fl, flags, arg);
+ if (rule->action == FR_ACT_GOTO) {
+ struct fib_rule *target;
+
+ target = rcu_dereference(rule->ctarget);
+ if (target == NULL) {
+ continue;
+ } else {
+ rule = target;
+ goto jumped;
+ }
+ } else if (rule->action == FR_ACT_NOP)
+ continue;
+ else
+ err = ops->action(rule, fl, flags, arg);
+
if (err != -EAGAIN) {
fib_rule_get(rule);
arg->rule = rule;
@@ -143,7 +164,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
}
}
- err = -ENETUNREACH;
+ err = -ESRCH;
out:
rcu_read_unlock();
@@ -152,13 +173,35 @@ out:
EXPORT_SYMBOL_GPL(fib_rules_lookup);
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
+ struct fib_rules_ops *ops)
+{
+ int err = -EINVAL;
+
+ if (frh->src_len)
+ if (tb[FRA_SRC] == NULL ||
+ frh->src_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_SRC]) != ops->addr_size)
+ goto errout;
+
+ if (frh->dst_len)
+ if (tb[FRA_DST] == NULL ||
+ frh->dst_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_DST]) != ops->addr_size)
+ goto errout;
+
+ err = 0;
+errout:
+ return err;
+}
+
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
struct nlattr *tb[FRA_MAX+1];
- int err = -EINVAL;
+ int err = -EINVAL, unresolved = 0;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
@@ -173,6 +216,10 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout;
+ err = validate_rulemsg(frh, tb, ops);
+ if (err < 0)
+ goto errout;
+
rule = kzalloc(ops->rule_size, GFP_KERNEL);
if (rule == NULL) {
err = -ENOMEM;
@@ -211,6 +258,28 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (!rule->pref && ops->default_pref)
rule->pref = ops->default_pref();
+ err = -EINVAL;
+ if (tb[FRA_GOTO]) {
+ if (rule->action != FR_ACT_GOTO)
+ goto errout_free;
+
+ rule->target = nla_get_u32(tb[FRA_GOTO]);
+ /* Backward jumps are prohibited to avoid endless loops */
+ if (rule->target <= rule->pref)
+ goto errout_free;
+
+ list_for_each_entry(r, ops->rules_list, list) {
+ if (r->pref == rule->target) {
+ rule->ctarget = r;
+ break;
+ }
+ }
+
+ if (rule->ctarget == NULL)
+ unresolved = 1;
+ } else if (rule->action == FR_ACT_GOTO)
+ goto errout_free;
+
err = ops->configure(rule, skb, nlh, frh, tb);
if (err < 0)
goto errout_free;
@@ -223,12 +292,35 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (ops->unresolved_rules) {
+ /*
+ * There are unresolved goto rules in the list, check if
+ * any of them are pointing to this new rule.
+ */
+ list_for_each_entry(r, ops->rules_list, list) {
+ if (r->action == FR_ACT_GOTO &&
+ r->target == rule->pref) {
+ BUG_ON(r->ctarget != NULL);
+ rcu_assign_pointer(r->ctarget, rule);
+ if (--ops->unresolved_rules == 0)
+ break;
+ }
+ }
+ }
+
+ if (rule->action == FR_ACT_GOTO)
+ ops->nr_goto_rules++;
+
+ if (unresolved)
+ ops->unresolved_rules++;
+
if (last)
list_add_rcu(&rule->list, &last->list);
else
list_add_rcu(&rule->list, ops->rules_list);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ flush_route_cache(ops);
rules_ops_put(ops);
return 0;
@@ -239,11 +331,11 @@ errout:
return err;
}
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule;
+ struct fib_rule *rule, *tmp;
struct nlattr *tb[FRA_MAX+1];
int err = -EINVAL;
@@ -260,6 +352,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout;
+ err = validate_rulemsg(frh, tb, ops);
+ if (err < 0)
+ goto errout;
+
list_for_each_entry(rule, ops->rules_list, list) {
if (frh->action && (frh->action != rule->action))
continue;
@@ -292,10 +388,30 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
list_del_rcu(&rule->list);
+
+ if (rule->action == FR_ACT_GOTO)
+ ops->nr_goto_rules--;
+
+ /*
+ * Check if this rule is a target to any of them. If so,
+ * disable them. As this operation is eventually very
+ * expensive, it is only performed if goto rules have
+ * actually been added.
+ */
+ if (ops->nr_goto_rules > 0) {
+ list_for_each_entry(tmp, ops->rules_list, list) {
+ if (tmp->ctarget == rule) {
+ rcu_assign_pointer(tmp->ctarget, NULL);
+ ops->unresolved_rules++;
+ }
+ }
+ }
+
synchronize_rcu();
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).pid);
fib_rule_put(rule);
+ flush_route_cache(ops);
rules_ops_put(ops);
return 0;
}
@@ -341,9 +457,16 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->ifname[0])
+ if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ frh->flags |= FIB_RULE_UNRESOLVED;
+
+ if (rule->ifname[0]) {
NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+ if (rule->ifindex == -1)
+ frh->flags |= FIB_RULE_DEV_DETACHED;
+ }
+
if (rule->pref)
NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
@@ -353,6 +476,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (rule->mark_mask || rule->mark)
NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
+ if (rule->target)
+ NLA_PUT_U32(skb, FRA_GOTO, rule->target);
+
if (ops->fill(rule, skb, nlh, frh) < 0)
goto nla_put_failure;
@@ -363,19 +489,14 @@ nla_put_failure:
return -EMSGSIZE;
}
-int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
+ struct fib_rules_ops *ops)
{
int idx = 0;
struct fib_rule *rule;
- struct fib_rules_ops *ops;
-
- ops = lookup_rules_ops(family);
- if (ops == NULL)
- return -EAFNOSUPPORT;
- rcu_read_lock();
list_for_each_entry(rule, ops->rules_list, list) {
- if (idx < cb->args[0])
+ if (idx < cb->args[1])
goto skip;
if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
@@ -385,14 +506,44 @@ int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
skip:
idx++;
}
- rcu_read_unlock();
- cb->args[0] = idx;
+ cb->args[1] = idx;
rules_ops_put(ops);
return skb->len;
}
-EXPORT_SYMBOL_GPL(fib_rules_dump);
+static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct fib_rules_ops *ops;
+ int idx = 0, family;
+
+ family = rtnl_msg_family(cb->nlh);
+ if (family != AF_UNSPEC) {
+ /* Protocol specific dump request */
+ ops = lookup_rules_ops(family);
+ if (ops == NULL)
+ return -EAFNOSUPPORT;
+
+ return dump_rules(skb, cb, ops);
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &rules_ops, list) {
+ if (idx < cb->args[0] || !try_module_get(ops->owner))
+ goto skip;
+
+ if (dump_rules(skb, cb, ops) < 0)
+ break;
+
+ cb->args[1] = 0;
+ skip:
+ idx++;
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -471,6 +622,10 @@ static struct notifier_block fib_rules_notifier = {
static int __init fib_rules_init(void)
{
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+
return register_netdevice_notifier(&fib_rules_notifier);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c53..bd903aaf7aa7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,11 +42,11 @@ static void *__load_pointer(struct sk_buff *skb, int k)
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
- ptr = skb->nh.raw + k - SKF_NET_OFF;
+ ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
- ptr = skb->mac.raw + k - SKF_LL_OFF;
+ ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr < skb->tail)
+ if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
return ptr;
return NULL;
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 259473d0559d..bcc25591d8ac 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
spin_lock_bh(lock);
d->lock = lock;
if (type)
- d->tail = (struct rtattr *) skb->tail;
+ d->tail = (struct rtattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
@@ -212,7 +212,7 @@ int
gnet_stats_finish_copy(struct gnet_dump *d)
{
if (d->tail)
- d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+ d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b249..e3c26a9ccad6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
case IF_LINK_MODE_DEFAULT:
default:
break;
- };
+ }
dev->operstate = operstate;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3183142c6044..6f3bb73053c2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -140,6 +140,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
+ if (n->parms->neigh_cleanup)
+ n->parms->neigh_cleanup(n);
neigh_release(n);
continue;
}
@@ -211,6 +213,8 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
write_unlock(&n->lock);
+ if (n->parms->neigh_cleanup)
+ n->parms->neigh_cleanup(n);
neigh_release(n);
}
}
@@ -582,9 +586,6 @@ void neigh_destroy(struct neighbour *neigh)
kfree(hh);
}
- if (neigh->parms->neigh_destructor)
- (neigh->parms->neigh_destructor)(neigh);
-
skb_queue_purge(&neigh->arp_queue);
dev_put(neigh->dev);
@@ -675,6 +676,8 @@ static void neigh_periodic_timer(unsigned long arg)
*np = n->next;
n->dead = 1;
write_unlock(&n->lock);
+ if (n->parms->neigh_cleanup)
+ n->parms->neigh_cleanup(n);
neigh_release(n);
continue;
}
@@ -1122,7 +1125,7 @@ int neigh_compat_output(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
if (dev->hard_header &&
dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
@@ -1144,7 +1147,7 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!dst || !(neigh = dst->neighbour))
goto discard;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
if (!neigh_event_send(neigh, skb)) {
int err;
@@ -1187,7 +1190,7 @@ int neigh_connected_output(struct sk_buff *skb)
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
read_lock_bh(&neigh->lock);
err = dev->hard_header(skb, dev, ntohs(skb->protocol),
@@ -1328,6 +1331,8 @@ void neigh_parms_destroy(struct neigh_parms *parms)
kfree(parms);
}
+static struct lock_class_key neigh_table_proxy_queue_class;
+
void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
unsigned long now = jiffies;
@@ -1376,7 +1381,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
init_timer(&tbl->proxy_timer);
tbl->proxy_timer.data = (unsigned long)tbl;
tbl->proxy_timer.function = neigh_proxy_process;
- skb_queue_head_init(&tbl->proxy_queue);
+ skb_queue_head_init_class(&tbl->proxy_queue,
+ &neigh_table_proxy_queue_class);
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
@@ -1435,7 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
return 0;
}
-int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm;
struct nlattr *dst_attr;
@@ -1500,7 +1506,7 @@ out:
return err;
}
-int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1780,7 +1786,7 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
@@ -1904,7 +1910,7 @@ errout:
return err;
}
-int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
@@ -2028,7 +2034,7 @@ out:
return rc;
}
-int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct neigh_table *tbl;
int t, family, s_t;
@@ -2088,8 +2094,11 @@ void __neigh_for_each_release(struct neigh_table *tbl,
} else
np = &n->next;
write_unlock(&n->lock);
- if (release)
+ if (release) {
+ if (n->parms->neigh_cleanup)
+ n->parms->neigh_cleanup(n);
neigh_release(n);
+ }
}
}
}
@@ -2384,7 +2393,7 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations neigh_stat_seq_ops = {
+static const struct seq_operations neigh_stat_seq_ops = {
.start = neigh_stat_seq_start,
.next = neigh_stat_seq_next,
.stop = neigh_stat_seq_stop,
@@ -2737,14 +2746,26 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
#endif /* CONFIG_SYSCTL */
+static int __init neigh_init(void)
+{
+ rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
+ rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+
+ return 0;
+}
+
+subsys_initcall(neigh_init);
+
EXPORT_SYMBOL(__neigh_event_send);
EXPORT_SYMBOL(neigh_changeaddr);
EXPORT_SYMBOL(neigh_compat_output);
EXPORT_SYMBOL(neigh_connected_output);
EXPORT_SYMBOL(neigh_create);
-EXPORT_SYMBOL(neigh_delete);
EXPORT_SYMBOL(neigh_destroy);
-EXPORT_SYMBOL(neigh_dump_info);
EXPORT_SYMBOL(neigh_event_ns);
EXPORT_SYMBOL(neigh_ifdown);
EXPORT_SYMBOL(neigh_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4cbb1290a6a3..b21307b15b82 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -352,8 +352,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
- if(dev->wireless_handlers &&
- dev->wireless_handlers->get_wireless_stats)
+ if (dev->wireless_handlers &&
+ dev->wireless_handlers->get_wireless_stats)
iw = dev->wireless_handlers->get_wireless_stats(dev);
if (iw != NULL)
ret = (*format)(iw, buf);
@@ -412,20 +412,25 @@ static int netdev_uevent(struct device *d, char **envp,
int num_envp, char *buf, int size)
{
struct net_device *dev = to_net_dev(d);
- int i = 0;
- int n;
+ int retval, len = 0, i = 0;
/* pass interface to uevent. */
- envp[i++] = buf;
- n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
- buf += n;
- size -= n;
-
- if ((size <= 0) || (i >= num_envp))
- return -ENOMEM;
-
+ retval = add_uevent_var(envp, num_envp, &i,
+ buf, size, &len,
+ "INTERFACE=%s", dev->name);
+ if (retval)
+ goto exit;
+
+ /* pass ifindex to uevent.
+ * ifindex is useful as it won't change (interface name may change)
+ * and is what RtNetlink uses natively. */
+ retval = add_uevent_var(envp, num_envp, &i,
+ buf, size, &len,
+ "IFINDEX=%d", dev->ifindex);
+
+exit:
envp[i] = NULL;
- return 0;
+ return retval;
}
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index da1019451ccb..758dafe284c0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -9,7 +9,6 @@
* Copyright (C) 2002 Red Hat, Inc.
*/
-#include <linux/smp_lock.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
@@ -86,7 +85,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
{
__wsum psum;
- if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+ if (uh->check == 0 || skb_csum_unnecessary(skb))
return 0;
psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
@@ -293,10 +292,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
if (!skb)
return;
- memcpy(skb->data, msg, len);
+ skb_copy_to_linear_data(skb, msg, len);
skb->len += len;
- skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+ skb_push(skb, sizeof(*udph));
+ skb_reset_transport_header(skb);
+ udph = udp_hdr(skb);
udph->source = htons(np->local_port);
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
@@ -308,7 +309,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
- skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+ skb_push(skb, sizeof(*iph));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
/* iph->version = 4; iph->ihl = 5; */
put_unaligned(0x45, (unsigned char *)iph);
@@ -324,7 +327,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb->protocol = eth->h_proto = htons(ETH_P_IP);
memcpy(eth->h_source, np->local_mac, 6);
memcpy(eth->h_dest, np->remote_mac, 6);
@@ -359,8 +362,9 @@ static void arp_reply(struct sk_buff *skb)
(2 * sizeof(u32)))))
return;
- skb->h.raw = skb->nh.raw = skb->data;
- arp = skb->nh.arph;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ arp = arp_hdr(skb);
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
@@ -389,7 +393,7 @@ static void arp_reply(struct sk_buff *skb)
if (!send_skb)
return;
- send_skb->nh.raw = send_skb->data;
+ skb_reset_network_header(send_skb);
arp = (struct arphdr *) skb_put(send_skb, size);
send_skb->dev = skb->dev;
send_skb->protocol = htons(ETH_P_ARP);
@@ -443,7 +447,7 @@ int __netpoll_rx(struct sk_buff *skb)
goto out;
/* check if netpoll clients need ARP */
- if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+ if (skb->protocol == htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
skb_queue_tail(&npi->arp_tx, skb);
return 1;
@@ -471,6 +475,13 @@ int __netpoll_rx(struct sk_buff *skb)
if (skb->len < len || len < iph->ihl*4)
goto out;
+ /*
+ * Our transport medium may have padded the buffer out.
+ * Now We trim to the true length of the frame.
+ */
+ if (pskb_trim_rcsum(skb, len))
+ goto out;
+
if (iph->protocol != IPPROTO_UDP)
goto out;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 74a9a32b906d..9cd3a1cb60ef 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -117,7 +117,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
-#include <linux/smp_lock.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -129,6 +128,7 @@
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
+#include <linux/freezer.h>
#include <linux/delay.h>
#include <linux/timer.h>
#include <linux/list.h>
@@ -163,14 +163,11 @@
#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
-/* #define PG_DEBUG(a) a */
-#define PG_DEBUG(a)
-
/* The buckets are exponential in 'width' */
#define LAT_BUCKETS_MAX 32
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
-#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100)
+#define MPLS_STACK_BOTTOM htonl(0x00000100)
/* Device flag bits */
#define F_IPSRC_RND (1<<0) /* IP-Src Random */
@@ -213,15 +210,11 @@ struct flow_state {
};
struct pktgen_dev {
-
/*
* Try to keep frequent/infrequent used vars. separated.
*/
-
- char ifname[IFNAMSIZ];
- char result[512];
-
- struct pktgen_thread *pg_thread; /* the owner */
+ struct proc_dir_entry *entry; /* proc file */
+ struct pktgen_thread *pg_thread;/* the owner */
struct list_head list; /* Used for chaining in the thread's run-queue */
int running; /* if this changes to false, the test will stop */
@@ -348,6 +341,8 @@ struct pktgen_dev {
unsigned cflows; /* Concurrent flows (config) */
unsigned lflow; /* Flow length (config) */
unsigned nflows; /* accumulated flows (stats) */
+
+ char result[512];
};
struct pktgen_hdr {
@@ -467,17 +462,6 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
return tmp;
}
-static inline u32 pktgen_random(void)
-{
-#if 0
- __u32 n;
- get_random_bytes(&n, 4);
- return n;
-#else
- return net_random();
-#endif
-}
-
static inline __u64 getCurMs(void)
{
struct timeval tv;
@@ -511,7 +495,7 @@ static void pktgen_stop_all_threads_ifs(void);
static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
-static int pktgen_mark_device(const char *ifname);
+
static unsigned int scan_ip6(const char *s, char ip[16]);
static unsigned int fmt_ip6(char *s, const char ip[16]);
@@ -605,7 +589,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
" frags: %d delay: %u clone_skb: %d ifname: %s\n",
pkt_dev->nfrags,
1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
- pkt_dev->clone_skb, pkt_dev->ifname);
+ pkt_dev->clone_skb, pkt_dev->odev->name);
seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows,
pkt_dev->lflow);
@@ -660,7 +644,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->nr_labels) {
unsigned i;
seq_printf(seq, " mpls: ");
- for(i = 0; i < pkt_dev->nr_labels; i++)
+ for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
i == pkt_dev->nr_labels-1 ? "\n" : ", ");
}
@@ -765,7 +749,7 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32
int i = 0;
*num = 0;
- for(; i < maxlen; i++) {
+ for (; i < maxlen; i++) {
char c;
*num <<= 4;
if (get_user(c, &user_buffer[i]))
@@ -801,7 +785,7 @@ static int count_trail_chars(const char __user * user_buffer,
break;
default:
goto done;
- };
+ }
}
done:
return i;
@@ -844,7 +828,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
break;
default:
break;
- };
+ }
}
done_str:
return i;
@@ -873,7 +857,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
n++;
if (n >= MAX_MPLS_LABELS)
return -E2BIG;
- } while(c == ',');
+ } while (c == ',');
pkt_dev->nr_labels = n;
return i;
@@ -1502,7 +1486,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (len < 0) { return len; }
i += len;
offset = sprintf(pg_result, "OK: mpls=");
- for(n = 0; n < pkt_dev->nr_labels; n++)
+ for (n = 0; n < pkt_dev->nr_labels; n++)
offset += sprintf(pg_result + offset,
"%08x%s", ntohl(pkt_dev->labels[n]),
n == pkt_dev->nr_labels-1 ? "" : ",");
@@ -1696,13 +1680,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
- seq_printf(seq, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->odev->name);
seq_printf(seq, "\nStopped: ");
list_for_each_entry(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
- seq_printf(seq, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->odev->name);
if (t->result[0])
seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1848,16 +1832,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
/*
* mark a device for removal
*/
-static int pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const char *ifname)
{
struct pktgen_dev *pkt_dev = NULL;
const int max_tries = 10, msec_per_try = 125;
int i = 0;
- int ret = 0;
mutex_lock(&pktgen_thread_lock);
- PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n",
- ifname));
+ pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
while (1) {
@@ -1866,8 +1848,8 @@ static int pktgen_mark_device(const char *ifname)
break; /* success */
mutex_unlock(&pktgen_thread_lock);
- PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s "
- "to disappear....\n", ifname));
+ pr_debug("pktgen: pktgen_mark_device waiting for %s "
+ "to disappear....\n", ifname);
schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
mutex_lock(&pktgen_thread_lock);
@@ -1875,79 +1857,91 @@ static int pktgen_mark_device(const char *ifname)
printk("pktgen_mark_device: timed out after waiting "
"%d msec for device %s to be removed\n",
msec_per_try * i, ifname);
- ret = 1;
break;
}
}
mutex_unlock(&pktgen_thread_lock);
+}
- return ret;
+static void pktgen_change_name(struct net_device *dev)
+{
+ struct pktgen_thread *t;
+
+ list_for_each_entry(t, &pktgen_threads, th_list) {
+ struct pktgen_dev *pkt_dev;
+
+ list_for_each_entry(pkt_dev, &t->if_list, list) {
+ if (pkt_dev->odev != dev)
+ continue;
+
+ remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+ pkt_dev->entry = create_proc_entry(dev->name, 0600,
+ pg_proc_dir);
+ if (!pkt_dev->entry)
+ printk(KERN_ERR "pktgen: can't move proc "
+ " entry for '%s'\n", dev->name);
+ break;
+ }
+ }
}
static int pktgen_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)(ptr);
+ struct net_device *dev = ptr;
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
*/
switch (event) {
- case NETDEV_CHANGEADDR:
- case NETDEV_GOING_DOWN:
- case NETDEV_DOWN:
- case NETDEV_UP:
- /* Ignore for now */
+ case NETDEV_CHANGENAME:
+ pktgen_change_name(dev);
break;
case NETDEV_UNREGISTER:
pktgen_mark_device(dev->name);
break;
- };
+ }
return NOTIFY_DONE;
}
/* Associate pktgen_dev with a device. */
-static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev)
+static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
{
struct net_device *odev;
+ int err;
/* Clean old setups */
-
if (pkt_dev->odev) {
dev_put(pkt_dev->odev);
pkt_dev->odev = NULL;
}
- odev = dev_get_by_name(pkt_dev->ifname);
-
+ odev = dev_get_by_name(ifname);
if (!odev) {
- printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
- goto out;
+ printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+ return -ENODEV;
}
+
if (odev->type != ARPHRD_ETHER) {
- printk("pktgen: not an ethernet device: \"%s\"\n",
- pkt_dev->ifname);
- goto out_put;
- }
- if (!netif_running(odev)) {
- printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
- goto out_put;
+ printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+ err = -EINVAL;
+ } else if (!netif_running(odev)) {
+ printk("pktgen: device is down: \"%s\"\n", ifname);
+ err = -ENETDOWN;
+ } else {
+ pkt_dev->odev = odev;
+ return 0;
}
- pkt_dev->odev = odev;
-
- return pkt_dev->odev;
-out_put:
dev_put(odev);
-out:
- return NULL;
-
+ return err;
}
/* Read pkt_dev from the interface and set up internal pktgen_dev
@@ -1955,10 +1949,6 @@ out:
*/
static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
{
- /* Try once more, just in case it works now. */
- if (!pkt_dev->odev)
- pktgen_setup_dev(pkt_dev);
-
if (!pkt_dev->odev) {
printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
sprintf(pkt_dev->result,
@@ -2095,7 +2085,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
int flow = 0;
if (pkt_dev->cflows) {
- flow = pktgen_random() % pkt_dev->cflows;
+ flow = random32() % pkt_dev->cflows;
if (pkt_dev->flows[flow].count > pkt_dev->lflow)
pkt_dev->flows[flow].count = 0;
@@ -2107,7 +2097,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACSRC_RND)
- mc = pktgen_random() % (pkt_dev->src_mac_count);
+ mc = random32() % pkt_dev->src_mac_count;
else {
mc = pkt_dev->cur_src_mac_offset++;
if (pkt_dev->cur_src_mac_offset >
@@ -2133,7 +2123,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACDST_RND)
- mc = pktgen_random() % (pkt_dev->dst_mac_count);
+ mc = random32() % pkt_dev->dst_mac_count;
else {
mc = pkt_dev->cur_dst_mac_offset++;
@@ -2157,27 +2147,26 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->flags & F_MPLS_RND) {
unsigned i;
- for(i = 0; i < pkt_dev->nr_labels; i++)
+ for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- ((__force __be32)pktgen_random() &
+ ((__force __be32)random32() &
htonl(0x000fffff));
}
if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
- pkt_dev->vlan_id = pktgen_random() % 4096;
+ pkt_dev->vlan_id = random32() & (4096-1);
}
if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
- pkt_dev->svlan_id = pktgen_random() % 4096;
+ pkt_dev->svlan_id = random32() & (4096 - 1);
}
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
- pkt_dev->cur_udp_src =
- ((pktgen_random() %
- (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) +
- pkt_dev->udp_src_min);
+ pkt_dev->cur_udp_src = random32() %
+ (pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+ + pkt_dev->udp_src_min;
else {
pkt_dev->cur_udp_src++;
@@ -2188,10 +2177,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
if (pkt_dev->flags & F_UDPDST_RND) {
- pkt_dev->cur_udp_dst =
- ((pktgen_random() %
- (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) +
- pkt_dev->udp_dst_min);
+ pkt_dev->cur_udp_dst = random32() %
+ (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+ + pkt_dev->udp_dst_min;
} else {
pkt_dev->cur_udp_dst++;
if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2206,7 +2194,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
saddr_max))) {
__u32 t;
if (pkt_dev->flags & F_IPSRC_RND)
- t = ((pktgen_random() % (imx - imn)) + imn);
+ t = random32() % (imx - imn) + imn;
else {
t = ntohl(pkt_dev->cur_saddr);
t++;
@@ -2227,14 +2215,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__be32 s;
if (pkt_dev->flags & F_IPDST_RND) {
- t = pktgen_random() % (imx - imn) + imn;
+ t = random32() % (imx - imn) + imn;
s = htonl(t);
while (LOOPBACK(s) || MULTICAST(s)
|| BADCLASS(s) || ZERONET(s)
|| LOCAL_MCAST(s)) {
- t = (pktgen_random() %
- (imx - imn)) + imn;
+ t = random32() % (imx - imn) + imn;
s = htonl(t);
}
pkt_dev->cur_daddr = s;
@@ -2266,7 +2253,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- (((__force __be32)pktgen_random() |
+ (((__force __be32)random32() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2276,9 +2263,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
if (pkt_dev->flags & F_TXSIZE_RND) {
- t = ((pktgen_random() %
- (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
- + pkt_dev->min_pkt_size);
+ t = random32() %
+ (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+ + pkt_dev->min_pkt_size;
} else {
t = pkt_dev->cur_pkt_size + 1;
if (t > pkt_dev->max_pkt_size)
@@ -2293,7 +2280,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
{
unsigned i;
- for(i = 0; i < pkt_dev->nr_labels; i++) {
+ for (i = 0; i < pkt_dev->nr_labels; i++) {
*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
}
mpls--;
@@ -2315,7 +2302,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
int datalen, iplen;
struct iphdr *iph;
struct pktgen_hdr *pgh = NULL;
- __be16 protocol = __constant_htons(ETH_P_IP);
+ __be16 protocol = htons(ETH_P_IP);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2324,10 +2311,10 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
if (pkt_dev->nr_labels)
- protocol = __constant_htons(ETH_P_MPLS_UC);
+ protocol = htons(ETH_P_MPLS_UC);
if (pkt_dev->vlan_id != 0xffff)
- protocol = __constant_htons(ETH_P_8021Q);
+ protocol = htons(ETH_P_8021Q);
/* Update any of the values, used when we're incrementing various
* fields.
@@ -2353,24 +2340,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mpls_push(mpls, pkt_dev);
if (pkt_dev->vlan_id != 0xffff) {
- if(pkt_dev->svlan_id != 0xffff) {
+ if (pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_tci = build_tci(pkt_dev->svlan_id,
pkt_dev->svlan_cfi,
pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ *svlan_encapsulated_proto = htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_tci = build_tci(pkt_dev->vlan_id,
pkt_dev->vlan_cfi,
pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
+ *vlan_encapsulated_proto = htons(ETH_P_IP);
}
- iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
- udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+ skb->network_header = skb->tail;
+ skb->transport_header = skb->network_header + sizeof(struct iphdr);
+ skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+
+ iph = ip_hdr(skb);
+ udph = udp_hdr(skb);
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) & eth[12] = protocol;
@@ -2399,12 +2390,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->check = 0;
iph->check = ip_fast_csum((void *)iph, iph->ihl);
skb->protocol = protocol;
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
- VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+ skb->mac_header = (skb->network_header - ETH_HLEN -
+ pkt_dev->nr_labels * sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- skb->nh.iph = iph;
- skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2653,7 +2643,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
int datalen;
struct ipv6hdr *iph;
struct pktgen_hdr *pgh = NULL;
- __be16 protocol = __constant_htons(ETH_P_IPV6);
+ __be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2661,10 +2651,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
if (pkt_dev->nr_labels)
- protocol = __constant_htons(ETH_P_MPLS_UC);
+ protocol = htons(ETH_P_MPLS_UC);
if (pkt_dev->vlan_id != 0xffff)
- protocol = __constant_htons(ETH_P_8021Q);
+ protocol = htons(ETH_P_8021Q);
/* Update any of the values, used when we're incrementing various
* fields.
@@ -2689,24 +2679,28 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
mpls_push(mpls, pkt_dev);
if (pkt_dev->vlan_id != 0xffff) {
- if(pkt_dev->svlan_id != 0xffff) {
+ if (pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_tci = build_tci(pkt_dev->svlan_id,
pkt_dev->svlan_cfi,
pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ *svlan_encapsulated_proto = htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_tci = build_tci(pkt_dev->vlan_id,
pkt_dev->vlan_cfi,
pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
+ *vlan_encapsulated_proto = htons(ETH_P_IPV6);
}
- iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
- udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+ skb->network_header = skb->tail;
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+
+ iph = ipv6_hdr(skb);
+ udph = udp_hdr(skb);
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) & eth[12] = protocol;
@@ -2728,7 +2722,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
udph->len = htons(datalen + sizeof(struct udphdr));
udph->check = 0; /* No checksum */
- *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */
+ *(__be32 *) iph = htonl(0x60000000); /* Version + flow */
if (pkt_dev->traffic_class) {
/* Version + traffic class + flow (0) */
@@ -2743,13 +2737,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
- VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+ skb->mac_header = (skb->network_header - ETH_HLEN -
+ pkt_dev->nr_labels * sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- skb->nh.ipv6h = iph;
- skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2847,7 +2840,7 @@ static void pktgen_run(struct pktgen_thread *t)
struct pktgen_dev *pkt_dev;
int started = 0;
- PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+ pr_debug("pktgen: entering pktgen_run. %p\n", t);
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -2879,7 +2872,7 @@ static void pktgen_stop_all_threads_ifs(void)
{
struct pktgen_thread *t;
- PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads_ifs.\n"));
+ pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
mutex_lock(&pktgen_thread_lock);
@@ -2947,7 +2940,7 @@ static void pktgen_run_all_threads(void)
{
struct pktgen_thread *t;
- PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+ pr_debug("pktgen: entering pktgen_run_all_threads.\n");
mutex_lock(&pktgen_thread_lock);
@@ -3005,7 +2998,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
if (!pkt_dev->running) {
printk("pktgen: interface: %s is already stopped\n",
- pkt_dev->ifname);
+ pkt_dev->odev->name);
return -EINVAL;
}
@@ -3039,7 +3032,7 @@ static void pktgen_stop(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev;
- PG_DEBUG(printk("pktgen: entering pktgen_stop\n"));
+ pr_debug("pktgen: entering pktgen_stop\n");
if_lock(t);
@@ -3063,7 +3056,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
struct list_head *q, *n;
struct pktgen_dev *cur;
- PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n"));
+ pr_debug("pktgen: entering pktgen_rem_one_if\n");
if_lock(t);
@@ -3092,7 +3085,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
/* Remove all devices, free mem */
- PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n"));
+ pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
@@ -3275,7 +3268,7 @@ static int pktgen_thread_worker(void *arg)
t->pid = current->pid;
- PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid));
+ pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid);
max_before_softirq = t->max_before_softirq;
@@ -3333,16 +3326,18 @@ static int pktgen_thread_worker(void *arg)
t->control &= ~(T_REMDEV);
}
+ try_to_freeze();
+
set_current_state(TASK_INTERRUPTIBLE);
}
- PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm));
+ pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
- PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm));
+ pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
pktgen_rem_all_ifs(t);
- PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm));
+ pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
pktgen_rem_thread(t);
return 0;
@@ -3355,13 +3350,13 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
if_lock(t);
list_for_each_entry(p, &t->if_list, list)
- if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) {
+ if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
pkt_dev = p;
break;
}
if_unlock(t);
- PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev));
+ pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
@@ -3396,7 +3391,7 @@ out:
static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
{
struct pktgen_dev *pkt_dev;
- struct proc_dir_entry *pe;
+ int err;
/* We don't allow a device to be on several threads */
@@ -3438,29 +3433,28 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
- strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
+ err = pktgen_setup_dev(pkt_dev, ifname);
+ if (err)
+ goto out1;
- if (!pktgen_setup_dev(pkt_dev)) {
- printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -ENODEV;
- }
-
- pe = create_proc_entry(ifname, 0600, pg_proc_dir);
- if (!pe) {
+ pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
+ if (!pkt_dev->entry) {
printk("pktgen: cannot create %s/%s procfs entry.\n",
PG_PROC_DIR, ifname);
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -EINVAL;
+ err = -EINVAL;
+ goto out2;
}
- pe->proc_fops = &pktgen_if_fops;
- pe->data = pkt_dev;
+ pkt_dev->entry->proc_fops = &pktgen_if_fops;
+ pkt_dev->entry->data = pkt_dev;
return add_dev_to_thread(t, pkt_dev);
+out2:
+ dev_put(pkt_dev->odev);
+out1:
+ if (pkt_dev->flows)
+ vfree(pkt_dev->flows);
+ kfree(pkt_dev);
+ return err;
}
static int __init pktgen_create_thread(int cpu)
@@ -3530,7 +3524,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
struct pktgen_dev *pkt_dev)
{
- PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+ pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
@@ -3548,9 +3542,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
_rem_dev_from_if_list(t, pkt_dev);
- /* Clean up proc file system */
-
- remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
+ if (pkt_dev->entry)
+ remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
if (pkt_dev->flows)
vfree(pkt_dev->flows);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6055074c4b81..8c971a2efe2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,11 +50,13 @@
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
-#include <net/netlink.h>
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
+#include <net/rtnetlink.h>
+
+struct rtnl_link
+{
+ rtnl_doit_func doit;
+ rtnl_dumpit_func dumpit;
+};
static DEFINE_MUTEX(rtnl_mutex);
static struct sock *rtnl;
@@ -95,7 +97,151 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
return 0;
}
-struct rtnetlink_link * rtnetlink_links[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+static inline int rtm_msgindex(int msgtype)
+{
+ int msgindex = msgtype - RTM_BASE;
+
+ /*
+ * msgindex < 0 implies someone tried to register a netlink
+ * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+ * the message type has not been added to linux/rtnetlink.h
+ */
+ BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+ return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL || tab[msgindex].doit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL || tab[msgindex].dumpit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].dumpit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+ struct rtnl_link *tab;
+ int msgindex;
+
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+ msgindex = rtm_msgindex(msgtype);
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL) {
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+ if (tab == NULL)
+ return -ENOBUFS;
+
+ rtnl_msg_handlers[protocol] = tab;
+ }
+
+ if (doit)
+ tab[msgindex].doit = doit;
+
+ if (dumpit)
+ tab[msgindex].dumpit = dumpit;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continueing.
+ */
+void rtnl_register(int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+ if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+ panic("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n",
+ protocol, msgtype);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+ int msgindex;
+
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+ msgindex = rtm_msgindex(msgtype);
+
+ if (rtnl_msg_handlers[protocol] == NULL)
+ return -ENOENT;
+
+ rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+
+ kfree(rtnl_msg_handlers[protocol]);
+ rtnl_msg_handlers[protocol] = NULL;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
static const int rtm_min[RTM_NR_FAMILIES] =
{
@@ -249,7 +395,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
operstate == IF_OPER_UNKNOWN)
operstate = IF_OPER_DORMANT;
break;
- };
+ }
if (dev->operstate != operstate) {
write_lock_bh(&dev_base_lock);
@@ -393,16 +539,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx = cb->args[0];
struct net_device *dev;
- read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
break;
+cont:
+ idx++;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
@@ -536,17 +683,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
modified = 1;
}
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (tb[IFLA_WIRELESS]) {
- /* Call Wireless Extensions.
- * Various stuff checked in there... */
- err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
- nla_len(tb[IFLA_WIRELESS]));
- if (err < 0)
- goto errout_dev;
- }
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
if (tb[IFLA_BROADCAST]) {
nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
send_addr_notify = 1;
@@ -610,21 +746,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
} else
return -EINVAL;
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (tb[IFLA_WIRELESS]) {
- /* Call Wireless Extensions. We need to know the size before
- * we can alloc. Various stuff checked in there... */
- err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
- nla_len(tb[IFLA_WIRELESS]),
- &iw_buf, &iw_buf_len);
- if (err < 0)
- goto errout;
-
- iw += IW_EV_POINT_OFF;
- }
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
if (nskb == NULL) {
err = -ENOBUFS;
@@ -658,12 +779,12 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
- if (rtnetlink_links[idx] == NULL ||
- rtnetlink_links[idx][type].dumpit == NULL)
+ if (rtnl_msg_handlers[idx] == NULL ||
+ rtnl_msg_handlers[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
- if (rtnetlink_links[idx][type].dumpit(skb, cb))
+ if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
@@ -699,30 +820,18 @@ static int rtattr_max;
/* Process one rtnetlink message. */
-static __inline__ int
-rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct rtnetlink_link *link;
- struct rtnetlink_link *link_tab;
+ rtnl_doit_func doit;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
- /* Only requests are handled by kernel now */
- if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
- return 0;
-
type = nlh->nlmsg_type;
-
- /* A control message: ignore them */
- if (type < RTM_BASE)
- return 0;
-
- /* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
- goto err_inval;
+ return -EOPNOTSUPP;
type -= RTM_BASE;
@@ -731,45 +840,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
- if (family >= NPROTO) {
- *errp = -EAFNOSUPPORT;
- return -1;
- }
-
- link_tab = rtnetlink_links[family];
- if (link_tab == NULL)
- link_tab = rtnetlink_links[PF_UNSPEC];
- link = &link_tab[type];
+ if (family >= NPROTO)
+ return -EAFNOSUPPORT;
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
- *errp = -EPERM;
- return -1;
- }
+ if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
- if (link->dumpit == NULL)
- link = &(rtnetlink_links[PF_UNSPEC][type]);
-
- if (link->dumpit == NULL)
- goto err_inval;
+ rtnl_dumpit_func dumpit;
- if ((*errp = netlink_dump_start(rtnl, skb, nlh,
- link->dumpit, NULL)) != 0) {
- return -1;
- }
+ dumpit = rtnl_get_dumpit(family, type);
+ if (dumpit == NULL)
+ return -EOPNOTSUPP;
- netlink_queue_skip(nlh, skb);
- return -1;
+ __rtnl_unlock();
+ err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ rtnl_lock();
+ return err;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
- goto err_inval;
+ return -EINVAL;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -779,25 +876,18 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
- goto err_inval;
+ return -EINVAL;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
- if (link->doit == NULL)
- link = &(rtnetlink_links[PF_UNSPEC][type]);
- if (link->doit == NULL)
- goto err_inval;
- err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+ doit = rtnl_get_doit(family, type);
+ if (doit == NULL)
+ return -EOPNOTSUPP;
- *errp = err;
- return err;
-
-err_inval:
- *errp = -EINVAL;
- return -1;
+ return doit(skb, nlh, (void *)&rta_buf[0]);
}
static void rtnetlink_rcv(struct sock *sk, int len)
@@ -813,25 +903,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
} while (qlen);
}
-static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
- [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
- .dumpit = rtnl_dump_ifinfo },
- [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
- [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
- [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
-#ifdef CONFIG_FIB_RULES
- [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
- [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
-#endif
- [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
- [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
-};
-
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
@@ -873,19 +944,22 @@ void __init rtnetlink_init(void)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
- THIS_MODULE);
+ &rtnl_mutex, THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
- rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
}
EXPORT_SYMBOL(__rta_fill);
EXPORT_SYMBOL(rtattr_strlcpy);
EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_trylock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 820761f9eeef..142257307fa2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#include <linux/scatterlist.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -87,8 +88,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
- "data:%p tail:%p end:%p dev:%s\n",
- here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ "data:%p tail:%#lx end:%#lx dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data,
+ (unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -105,8 +107,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
- "data:%p tail:%p end:%p dev:%s\n",
- here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ "data:%p tail:%#lx end:%#lx dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data,
+ (unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -155,20 +158,22 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
if (!skb)
goto out;
- /* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
gfp_mask, node);
if (!data)
goto nodata;
- memset(skb, 0, offsetof(struct sk_buff, truesize));
+ /*
+ * See comment in sk_buff definition, just before the 'tail' member
+ */
+ memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = size + sizeof(struct sk_buff);
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
- skb->tail = data;
- skb->end = data + size;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
atomic_set(&shinfo->dataref, 1);
@@ -197,61 +202,6 @@ nodata:
}
/**
- * alloc_skb_from_cache - allocate a network buffer
- * @cp: kmem_cache from which to allocate the data area
- * (object size must be big enough for @size bytes + skb overheads)
- * @size: size to allocate
- * @gfp_mask: allocation mask
- *
- * Allocate a new &sk_buff. The returned buffer has no headroom and
- * tail room of size bytes. The object has a reference count of one.
- * The return is the buffer. On a failure the return is %NULL.
- *
- * Buffers may only be allocated from interrupts using a @gfp_mask of
- * %GFP_ATOMIC.
- */
-struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
- unsigned int size,
- gfp_t gfp_mask)
-{
- struct sk_buff *skb;
- u8 *data;
-
- /* Get the HEAD */
- skb = kmem_cache_alloc(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA);
- if (!skb)
- goto out;
-
- /* Get the DATA. */
- size = SKB_DATA_ALIGN(size);
- data = kmem_cache_alloc(cp, gfp_mask);
- if (!data)
- goto nodata;
-
- memset(skb, 0, offsetof(struct sk_buff, truesize));
- skb->truesize = size + sizeof(struct sk_buff);
- atomic_set(&skb->users, 1);
- skb->head = data;
- skb->data = data;
- skb->tail = data;
- skb->end = data + size;
-
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_segs = 0;
- skb_shinfo(skb)->gso_type = 0;
- skb_shinfo(skb)->frag_list = NULL;
-out:
- return skb;
-nodata:
- kmem_cache_free(skbuff_head_cache, skb);
- skb = NULL;
- goto out;
-}
-
-/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
* @length: length to allocate
@@ -354,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
if (atomic_dec_and_test(fclone_ref))
kmem_cache_free(skbuff_fclone_cache, other);
break;
- };
+ }
}
/**
@@ -376,15 +326,13 @@ void __kfree_skb(struct sk_buff *skb)
WARN_ON(in_irq());
skb->destructor(skb);
}
-#ifdef CONFIG_NETFILTER
- nf_conntrack_put(skb->nfct);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put(skb->nfct);
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
-#endif
/* XXX: IS this still necessary? - JHS */
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
@@ -451,9 +399,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->sk = NULL;
C(tstamp);
C(dev);
- C(h);
- C(nh);
- C(mac);
+ C(transport_header);
+ C(network_header);
+ C(mac_header);
C(dst);
dst_clone(skb->dst);
C(sp);
@@ -463,6 +411,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
memcpy(n->cb, skb->cb, sizeof(skb->cb));
C(len);
C(data_len);
+ C(mac_len);
C(csum);
C(local_df);
n->cloned = 1;
@@ -476,26 +425,14 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
C(protocol);
n->destructor = NULL;
C(mark);
-#ifdef CONFIG_NETFILTER
- C(nfct);
- nf_conntrack_get(skb->nfct);
- C(nfctinfo);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- C(nfct_reasm);
- nf_conntrack_get_reasm(skb->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- C(nf_bridge);
- nf_bridge_get(skb->nf_bridge);
-#endif
-#endif /*CONFIG_NETFILTER*/
+ __nf_copy(n, skb);
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
- C(input_dev);
+ C(iif);
#endif
skb_copy_secmark(n, skb);
#endif
@@ -514,11 +451,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
/*
* Shift between the two data areas in bytes
*/
unsigned long offset = new->data - old->data;
-
+#endif
new->sk = NULL;
new->dev = old->dev;
new->priority = old->priority;
@@ -527,9 +465,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_INET
new->sp = secpath_get(old->sp);
#endif
- new->h.raw = old->h.raw + offset;
- new->nh.raw = old->nh.raw + offset;
- new->mac.raw = old->mac.raw + offset;
+ new->transport_header = old->transport_header;
+ new->network_header = old->network_header;
+ new->mac_header = old->mac_header;
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+ /* {transport,network,mac}_header are relative to skb->head */
+ new->transport_header += offset;
+ new->network_header += offset;
+ new->mac_header += offset;
+#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
new->fclone = SKB_FCLONE_UNAVAILABLE;
@@ -537,22 +481,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->tstamp = old->tstamp;
new->destructor = NULL;
new->mark = old->mark;
-#ifdef CONFIG_NETFILTER
- new->nfct = old->nfct;
- nf_conntrack_get(old->nfct);
- new->nfctinfo = old->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- new->nfct_reasm = old->nfct_reasm;
- nf_conntrack_get_reasm(old->nfct_reasm);
-#endif
+ __nf_copy(new, old);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- new->nf_bridge = old->nf_bridge;
- nf_bridge_get(old->nf_bridge);
-#endif
-#endif
#ifdef CONFIG_NET_SCHED
#ifdef CONFIG_NET_CLS_ACT
new->tc_verd = old->tc_verd;
@@ -589,8 +521,12 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
- gfp_mask);
+ struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ n = alloc_skb(skb->end + skb->data_len, gfp_mask);
+#else
+ n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+#endif
if (!n)
return NULL;
@@ -627,8 +563,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
-
+ struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ n = alloc_skb(skb->end, gfp_mask);
+#else
+ n = alloc_skb(skb->end - skb->head, gfp_mask);
+#endif
if (!n)
goto out;
@@ -637,7 +577,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
- memcpy(n->data, skb->data, n->len);
+ skb_copy_from_linear_data(skb, n->data, n->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
@@ -686,7 +626,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
{
int i;
u8 *data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ int size = nhead + skb->end + ntail;
+#else
int size = nhead + (skb->end - skb->head) + ntail;
+#endif
long off;
if (skb_shared(skb))
@@ -700,8 +644,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
/* Copy only real data... and, alas, header. This should be
* optimized for the cases when header is void. */
- memcpy(data + nhead, skb->head, skb->tail - skb->head);
- memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+ memcpy(data + nhead, skb->head,
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->tail);
+#else
+ skb->tail - skb->head);
+#endif
+ memcpy(data + size, skb_end_pointer(skb),
+ sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
@@ -714,12 +664,18 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
off = (data + nhead) - skb->head;
skb->head = data;
- skb->end = data + size;
skb->data += off;
- skb->tail += off;
- skb->mac.raw += off;
- skb->h.raw += off;
- skb->nh.raw += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+ off = nhead;
+#else
+ skb->end = skb->head + size;
+#endif
+ /* {transport,network,mac}_header and tail are relative to skb->head */
+ skb->tail += off;
+ skb->transport_header += off;
+ skb->network_header += off;
+ skb->mac_header += off;
skb->cloned = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
@@ -780,7 +736,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
*/
struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
gfp_mask);
+ int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
+ int off = 0;
if (!n)
return NULL;
@@ -790,7 +748,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/* Set the tail pointer and length */
skb_put(n, skb->len);
- head_copy_len = skb_headroom(skb);
+ head_copy_len = oldheadroom;
head_copy_off = 0;
if (newheadroom <= head_copy_len)
head_copy_len = newheadroom;
@@ -804,6 +762,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
copy_skb_header(n, skb);
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ off = newheadroom - oldheadroom;
+#endif
+ n->transport_header += off;
+ n->network_header += off;
+ n->mac_header += off;
+
return n;
}
@@ -931,7 +896,7 @@ done:
} else {
skb->len = len;
skb->data_len = 0;
- skb->tail = skb->data + len;
+ skb_set_tail_pointer(skb, len);
}
return 0;
@@ -976,7 +941,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+ if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
BUG();
/* Optimization: no fragments, no reasons to preestimate
@@ -1072,7 +1037,7 @@ pull_pages:
skb->tail += delta;
skb->data_len -= delta;
- return skb->tail;
+ return skb_tail_pointer(skb);
}
/* Copy some data bits from skb to kernel buffer. */
@@ -1089,7 +1054,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
- memcpy(to, skb->data + offset, copy);
+ skb_copy_from_linear_data_offset(skb, offset, to, copy);
if ((len -= copy) == 0)
return 0;
offset += copy;
@@ -1164,7 +1129,7 @@ fault:
* traversing fragment lists and such.
*/
-int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
{
int i, copy;
int start = skb_headlen(skb);
@@ -1175,7 +1140,7 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
- memcpy(skb->data + offset, from, copy);
+ skb_copy_to_linear_data_offset(skb, offset, from, copy);
if ((len -= copy) == 0)
return 0;
offset += copy;
@@ -1402,13 +1367,13 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
long csstart;
if (skb->ip_summed == CHECKSUM_PARTIAL)
- csstart = skb->h.raw - skb->data;
+ csstart = skb->csum_start - skb_headroom(skb);
else
csstart = skb_headlen(skb);
BUG_ON(csstart > skb_headlen(skb));
- memcpy(to, skb->data, csstart);
+ skb_copy_from_linear_data(skb, to, csstart);
csum = 0;
if (csstart != skb->len)
@@ -1576,27 +1541,14 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
spin_unlock_irqrestore(&list->lock, flags);
}
-#if 0
-/*
- * Tune the memory allocator for a new MTU size.
- */
-void skb_add_mtu(int mtu)
-{
- /* Must match allocation in alloc_skb */
- mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
-
- kmem_add_cache_size(mtu);
-}
-#endif
-
static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, const int pos)
{
int i;
- memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
-
+ skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
+ pos - len);
/* And move data appendix as is. */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -1607,7 +1559,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
skb1->len += skb1->data_len;
skb->data_len = 0;
skb->len = len;
- skb->tail = skb->data + len;
+ skb_set_tail_pointer(skb, len);
}
static inline void skb_split_no_header(struct sk_buff *skb,
@@ -1932,7 +1884,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
unsigned int mss = skb_shinfo(skb)->gso_size;
- unsigned int doffset = skb->data - skb->mac.raw;
+ unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
@@ -1982,11 +1934,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
nskb->mac_len = skb->mac_len;
skb_reserve(nskb, headroom);
- nskb->mac.raw = nskb->data;
- nskb->nh.raw = nskb->data + skb->mac_len;
- nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
- memcpy(skb_put(nskb, doffset), skb->data, doffset);
-
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb->mac_len);
+ nskb->transport_header = (nskb->network_header +
+ skb_network_header_len(skb));
+ skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
+ doffset);
if (!sg) {
nskb->csum = skb_copy_and_csum_bits(skb, offset,
skb_put(nskb, len),
@@ -1999,7 +1952,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum = skb->csum;
- memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+ skb_copy_from_linear_data_offset(skb, offset,
+ skb_put(nskb, hsize), hsize);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
@@ -2059,6 +2013,190 @@ void __init skb_init(void)
NULL, NULL);
}
+/**
+ * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ * @skb: Socket buffer containing the buffers to be mapped
+ * @sg: The scatter-gather list to map into
+ * @offset: The offset into the buffer's contents to start mapping
+ * @len: Length of buffer space to be mapped
+ *
+ * Fill the specified scatter-gather list with mappings/pointers into a
+ * region of the buffer space attached to a socket buffer.
+ */
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ int elt = 0;
+
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ sg[elt].page = virt_to_page(skb->data + offset);
+ sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+ sg[elt].length = copy;
+ elt++;
+ if ((len -= copy) == 0)
+ return elt;
+ offset += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ sg[elt].page = frag->page;
+ sg[elt].offset = frag->page_offset+offset-start;
+ sg[elt].length = copy;
+ elt++;
+ if (!(len -= copy))
+ return elt;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+ if ((len -= copy) == 0)
+ return elt;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+ BUG_ON(len);
+ return elt;
+}
+
+/**
+ * skb_cow_data - Check that a socket buffer's data buffers are writable
+ * @skb: The socket buffer to check.
+ * @tailbits: Amount of trailing space to be added
+ * @trailer: Returned pointer to the skb where the @tailbits space begins
+ *
+ * Make sure that the data buffers attached to a socket buffer are
+ * writable. If they are not, private copies are made of the data buffers
+ * and the socket buffer is set to use these instead.
+ *
+ * If @tailbits is given, make sure that there is space to write @tailbits
+ * bytes of data beyond current end of socket buffer. @trailer will be
+ * set to point to the skb in which this space begins.
+ *
+ * The number of scatterlist elements required to completely map the
+ * COW'd and extended socket buffer will be returned.
+ */
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+ int copyflag;
+ int elt;
+ struct sk_buff *skb1, **skb_p;
+
+ /* If skb is cloned or its head is paged, reallocate
+ * head pulling out all the pages (pages are considered not writable
+ * at the moment even if they are anonymous).
+ */
+ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+ __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+ return -ENOMEM;
+
+ /* Easy case. Most of packets will go this way. */
+ if (!skb_shinfo(skb)->frag_list) {
+ /* A little of trouble, not enough of space for trailer.
+ * This should not happen, when stack is tuned to generate
+ * good frames. OK, on miss we reallocate and reserve even more
+ * space, 128 bytes is fair. */
+
+ if (skb_tailroom(skb) < tailbits &&
+ pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+ return -ENOMEM;
+
+ /* Voila! */
+ *trailer = skb;
+ return 1;
+ }
+
+ /* Misery. We are in troubles, going to mincer fragments... */
+
+ elt = 1;
+ skb_p = &skb_shinfo(skb)->frag_list;
+ copyflag = 0;
+
+ while ((skb1 = *skb_p) != NULL) {
+ int ntail = 0;
+
+ /* The fragment is partially pulled by someone,
+ * this can happen on input. Copy it and everything
+ * after it. */
+
+ if (skb_shared(skb1))
+ copyflag = 1;
+
+ /* If the skb is the last, worry about trailer. */
+
+ if (skb1->next == NULL && tailbits) {
+ if (skb_shinfo(skb1)->nr_frags ||
+ skb_shinfo(skb1)->frag_list ||
+ skb_tailroom(skb1) < tailbits)
+ ntail = tailbits + 128;
+ }
+
+ if (copyflag ||
+ skb_cloned(skb1) ||
+ ntail ||
+ skb_shinfo(skb1)->nr_frags ||
+ skb_shinfo(skb1)->frag_list) {
+ struct sk_buff *skb2;
+
+ /* Fuck, we are miserable poor guys... */
+ if (ntail == 0)
+ skb2 = skb_copy(skb1, GFP_ATOMIC);
+ else
+ skb2 = skb_copy_expand(skb1,
+ skb_headroom(skb1),
+ ntail,
+ GFP_ATOMIC);
+ if (unlikely(skb2 == NULL))
+ return -ENOMEM;
+
+ if (skb1->sk)
+ skb_set_owner_w(skb2, skb1->sk);
+
+ /* Looking around. Are we still alive?
+ * OK, link new skb, drop old one */
+
+ skb2->next = skb1->next;
+ *skb_p = skb2;
+ kfree_skb(skb1);
+ skb1 = skb2;
+ }
+ elt++;
+ *trailer = skb1;
+ skb_p = &skb1->next;
+ }
+
+ return elt;
+}
+
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb);
@@ -2093,3 +2231,6 @@ EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
EXPORT_SYMBOL(skb_append_datato_frags);
+
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8d65d6478dcd..22183c2ef284 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
"sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
"sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
- "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+ "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
+ "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
};
static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -167,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
"slock-27" , "slock-28" , "slock-29" ,
- "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+ "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
+ "slock-AF_RXRPC" , "slock-AF_MAX"
};
#endif
@@ -361,8 +363,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
#endif
- if(optlen<sizeof(int))
- return(-EINVAL);
+ if (optlen < sizeof(int))
+ return -EINVAL;
if (get_user(val, (int __user *)optval))
return -EFAULT;
@@ -371,265 +373,270 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
- switch(optname)
- {
- case SO_DEBUG:
- if(val && !capable(CAP_NET_ADMIN))
- {
- ret = -EACCES;
- }
- else if (valbool)
- sock_set_flag(sk, SOCK_DBG);
- else
- sock_reset_flag(sk, SOCK_DBG);
- break;
- case SO_REUSEADDR:
- sk->sk_reuse = valbool;
- break;
- case SO_TYPE:
- case SO_ERROR:
- ret = -ENOPROTOOPT;
- break;
- case SO_DONTROUTE:
- if (valbool)
- sock_set_flag(sk, SOCK_LOCALROUTE);
- else
- sock_reset_flag(sk, SOCK_LOCALROUTE);
- break;
- case SO_BROADCAST:
- sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
- break;
- case SO_SNDBUF:
- /* Don't error on this BSD doesn't and if you think
- about it this is right. Otherwise apps have to
- play 'guess the biggest size' games. RCVBUF/SNDBUF
- are treated in BSD as hints */
-
- if (val > sysctl_wmem_max)
- val = sysctl_wmem_max;
+ switch(optname) {
+ case SO_DEBUG:
+ if (val && !capable(CAP_NET_ADMIN)) {
+ ret = -EACCES;
+ }
+ else if (valbool)
+ sock_set_flag(sk, SOCK_DBG);
+ else
+ sock_reset_flag(sk, SOCK_DBG);
+ break;
+ case SO_REUSEADDR:
+ sk->sk_reuse = valbool;
+ break;
+ case SO_TYPE:
+ case SO_ERROR:
+ ret = -ENOPROTOOPT;
+ break;
+ case SO_DONTROUTE:
+ if (valbool)
+ sock_set_flag(sk, SOCK_LOCALROUTE);
+ else
+ sock_reset_flag(sk, SOCK_LOCALROUTE);
+ break;
+ case SO_BROADCAST:
+ sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+ break;
+ case SO_SNDBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
+
+ if (val > sysctl_wmem_max)
+ val = sysctl_wmem_max;
set_sndbuf:
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- if ((val * 2) < SOCK_MIN_SNDBUF)
- sk->sk_sndbuf = SOCK_MIN_SNDBUF;
- else
- sk->sk_sndbuf = val * 2;
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ if ((val * 2) < SOCK_MIN_SNDBUF)
+ sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+ else
+ sk->sk_sndbuf = val * 2;
- /*
- * Wake up sending tasks if we
- * upped the value.
- */
- sk->sk_write_space(sk);
- break;
+ /*
+ * Wake up sending tasks if we
+ * upped the value.
+ */
+ sk->sk_write_space(sk);
+ break;
- case SO_SNDBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
- goto set_sndbuf;
+ case SO_SNDBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_sndbuf;
- case SO_RCVBUF:
- /* Don't error on this BSD doesn't and if you think
- about it this is right. Otherwise apps have to
- play 'guess the biggest size' games. RCVBUF/SNDBUF
- are treated in BSD as hints */
+ case SO_RCVBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
- if (val > sysctl_rmem_max)
- val = sysctl_rmem_max;
+ if (val > sysctl_rmem_max)
+ val = sysctl_rmem_max;
set_rcvbuf:
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- if ((val * 2) < SOCK_MIN_RCVBUF)
- sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
- else
- sk->sk_rcvbuf = val * 2;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ /*
+ * We double it on the way in to account for
+ * "struct sk_buff" etc. overhead. Applications
+ * assume that the SO_RCVBUF setting they make will
+ * allow that much actual data to be received on that
+ * socket.
+ *
+ * Applications are unaware that "struct sk_buff" and
+ * other overheads allocate from the receive buffer
+ * during socket buffer allocation.
+ *
+ * And after considering the possible alternatives,
+ * returning the value we actually used in getsockopt
+ * is the most desirable behavior.
+ */
+ if ((val * 2) < SOCK_MIN_RCVBUF)
+ sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+ else
+ sk->sk_rcvbuf = val * 2;
+ break;
+
+ case SO_RCVBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
break;
+ }
+ goto set_rcvbuf;
- case SO_RCVBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
- goto set_rcvbuf;
-
- case SO_KEEPALIVE:
+ case SO_KEEPALIVE:
#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP)
- tcp_set_keepalive(sk, valbool);
+ if (sk->sk_protocol == IPPROTO_TCP)
+ tcp_set_keepalive(sk, valbool);
#endif
- sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
- break;
-
- case SO_OOBINLINE:
- sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
+
+ case SO_OOBINLINE:
+ sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+ break;
+
+ case SO_NO_CHECK:
+ sk->sk_no_check = valbool;
+ break;
+
+ case SO_PRIORITY:
+ if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ sk->sk_priority = val;
+ else
+ ret = -EPERM;
+ break;
+
+ case SO_LINGER:
+ if (optlen < sizeof(ling)) {
+ ret = -EINVAL; /* 1003.1g */
break;
-
- case SO_NO_CHECK:
- sk->sk_no_check = valbool;
- break;
-
- case SO_PRIORITY:
- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
- sk->sk_priority = val;
- else
- ret = -EPERM;
+ }
+ if (copy_from_user(&ling,optval,sizeof(ling))) {
+ ret = -EFAULT;
break;
-
- case SO_LINGER:
- if(optlen<sizeof(ling)) {
- ret = -EINVAL; /* 1003.1g */
- break;
- }
- if (copy_from_user(&ling,optval,sizeof(ling))) {
- ret = -EFAULT;
- break;
- }
- if (!ling.l_onoff)
- sock_reset_flag(sk, SOCK_LINGER);
- else {
+ }
+ if (!ling.l_onoff)
+ sock_reset_flag(sk, SOCK_LINGER);
+ else {
#if (BITS_PER_LONG == 32)
- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
- else
+ if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+ sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+ else
#endif
- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
- sock_set_flag(sk, SOCK_LINGER);
- }
- break;
-
- case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("setsockopt");
- break;
-
- case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
+ sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+ sock_set_flag(sk, SOCK_LINGER);
+ }
+ break;
+
+ case SO_BSDCOMPAT:
+ sock_warn_obsolete_bsdism("setsockopt");
+ break;
+
+ case SO_PASSCRED:
+ if (valbool)
+ set_bit(SOCK_PASSCRED, &sock->flags);
+ else
+ clear_bit(SOCK_PASSCRED, &sock->flags);
+ break;
+
+ case SO_TIMESTAMP:
+ case SO_TIMESTAMPNS:
+ if (valbool) {
+ if (optname == SO_TIMESTAMP)
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
else
- clear_bit(SOCK_PASSCRED, &sock->flags);
- break;
+ sock_set_flag(sk, SOCK_RCVTSTAMPNS);
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
+ sock_enable_timestamp(sk);
+ } else {
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+ }
+ break;
- case SO_TIMESTAMP:
- if (valbool) {
- sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk);
- } else
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- break;
+ case SO_RCVLOWAT:
+ if (val < 0)
+ val = INT_MAX;
+ sk->sk_rcvlowat = val ? : 1;
+ break;
- case SO_RCVLOWAT:
- if (val < 0)
- val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
- break;
+ case SO_RCVTIMEO:
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+ break;
- case SO_RCVTIMEO:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
- break;
+ case SO_SNDTIMEO:
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+ break;
- case SO_SNDTIMEO:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+#ifdef CONFIG_NETDEVICES
+ case SO_BINDTODEVICE:
+ {
+ char devname[IFNAMSIZ];
+
+ /* Sorry... */
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
break;
+ }
-#ifdef CONFIG_NETDEVICES
- case SO_BINDTODEVICE:
- {
- char devname[IFNAMSIZ];
+ /* Bind this socket to a particular device like "eth0",
+ * as specified in the passed interface name. If the
+ * name is "" or the option length is zero the socket
+ * is not bound.
+ */
- /* Sorry... */
- if (!capable(CAP_NET_RAW)) {
- ret = -EPERM;
+ if (!valbool) {
+ sk->sk_bound_dev_if = 0;
+ } else {
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+ memset(devname, 0, sizeof(devname));
+ if (copy_from_user(devname, optval, optlen)) {
+ ret = -EFAULT;
break;
}
- /* Bind this socket to a particular device like "eth0",
- * as specified in the passed interface name. If the
- * name is "" or the option length is zero the socket
- * is not bound.
- */
+ /* Remove any cached route for this socket. */
+ sk_dst_reset(sk);
- if (!valbool) {
+ if (devname[0] == '\0') {
sk->sk_bound_dev_if = 0;
} else {
- if (optlen > IFNAMSIZ - 1)
- optlen = IFNAMSIZ - 1;
- memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
- ret = -EFAULT;
+ struct net_device *dev = dev_get_by_name(devname);
+ if (!dev) {
+ ret = -ENODEV;
break;
}
-
- /* Remove any cached route for this socket. */
- sk_dst_reset(sk);
-
- if (devname[0] == '\0') {
- sk->sk_bound_dev_if = 0;
- } else {
- struct net_device *dev = dev_get_by_name(devname);
- if (!dev) {
- ret = -ENODEV;
- break;
- }
- sk->sk_bound_dev_if = dev->ifindex;
- dev_put(dev);
- }
+ sk->sk_bound_dev_if = dev->ifindex;
+ dev_put(dev);
}
- break;
}
+ break;
+ }
#endif
- case SO_ATTACH_FILTER:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
+ case SO_ATTACH_FILTER:
+ ret = -EINVAL;
+ if (optlen == sizeof(struct sock_fprog)) {
+ struct sock_fprog fprog;
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
-
- ret = sk_attach_filter(&fprog, sk);
- }
- break;
-
- case SO_DETACH_FILTER:
- rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
- if (filter) {
- rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_release(sk, filter);
- rcu_read_unlock_bh();
+ ret = -EFAULT;
+ if (copy_from_user(&fprog, optval, sizeof(fprog)))
break;
- }
+
+ ret = sk_attach_filter(&fprog, sk);
+ }
+ break;
+
+ case SO_DETACH_FILTER:
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ rcu_assign_pointer(sk->sk_filter, NULL);
+ sk_filter_release(sk, filter);
rcu_read_unlock_bh();
- ret = -ENONET;
break;
+ }
+ rcu_read_unlock_bh();
+ ret = -ENONET;
+ break;
- case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
- break;
+ case SO_PASSSEC:
+ if (valbool)
+ set_bit(SOCK_PASSSEC, &sock->flags);
+ else
+ clear_bit(SOCK_PASSSEC, &sock->flags);
+ break;
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
- default:
- ret = -ENOPROTOOPT;
- break;
+ default:
+ ret = -ENOPROTOOPT;
+ break;
}
release_sock(sk);
return ret;
@@ -641,8 +648,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
- union
- {
+ union {
int val;
struct linger ling;
struct timeval tm;
@@ -651,148 +657,153 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
unsigned int lv = sizeof(int);
int len;
- if(get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
- if(len < 0)
+ if (len < 0)
return -EINVAL;
- switch(optname)
- {
- case SO_DEBUG:
- v.val = sock_flag(sk, SOCK_DBG);
- break;
-
- case SO_DONTROUTE:
- v.val = sock_flag(sk, SOCK_LOCALROUTE);
- break;
-
- case SO_BROADCAST:
- v.val = !!sock_flag(sk, SOCK_BROADCAST);
- break;
-
- case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
- break;
-
- case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
- break;
-
- case SO_REUSEADDR:
- v.val = sk->sk_reuse;
- break;
-
- case SO_KEEPALIVE:
- v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
- break;
-
- case SO_TYPE:
- v.val = sk->sk_type;
- break;
-
- case SO_ERROR:
- v.val = -sock_error(sk);
- if(v.val==0)
- v.val = xchg(&sk->sk_err_soft, 0);
- break;
-
- case SO_OOBINLINE:
- v.val = !!sock_flag(sk, SOCK_URGINLINE);
- break;
-
- case SO_NO_CHECK:
- v.val = sk->sk_no_check;
- break;
-
- case SO_PRIORITY:
- v.val = sk->sk_priority;
- break;
-
- case SO_LINGER:
- lv = sizeof(v.ling);
- v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
- v.ling.l_linger = sk->sk_lingertime / HZ;
- break;
-
- case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("getsockopt");
- break;
-
- case SO_TIMESTAMP:
- v.val = sock_flag(sk, SOCK_RCVTSTAMP);
- break;
+ switch(optname) {
+ case SO_DEBUG:
+ v.val = sock_flag(sk, SOCK_DBG);
+ break;
+
+ case SO_DONTROUTE:
+ v.val = sock_flag(sk, SOCK_LOCALROUTE);
+ break;
+
+ case SO_BROADCAST:
+ v.val = !!sock_flag(sk, SOCK_BROADCAST);
+ break;
+
+ case SO_SNDBUF:
+ v.val = sk->sk_sndbuf;
+ break;
+
+ case SO_RCVBUF:
+ v.val = sk->sk_rcvbuf;
+ break;
+
+ case SO_REUSEADDR:
+ v.val = sk->sk_reuse;
+ break;
+
+ case SO_KEEPALIVE:
+ v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+ break;
+
+ case SO_TYPE:
+ v.val = sk->sk_type;
+ break;
+
+ case SO_ERROR:
+ v.val = -sock_error(sk);
+ if (v.val==0)
+ v.val = xchg(&sk->sk_err_soft, 0);
+ break;
+
+ case SO_OOBINLINE:
+ v.val = !!sock_flag(sk, SOCK_URGINLINE);
+ break;
+
+ case SO_NO_CHECK:
+ v.val = sk->sk_no_check;
+ break;
+
+ case SO_PRIORITY:
+ v.val = sk->sk_priority;
+ break;
+
+ case SO_LINGER:
+ lv = sizeof(v.ling);
+ v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
+ v.ling.l_linger = sk->sk_lingertime / HZ;
+ break;
+
+ case SO_BSDCOMPAT:
+ sock_warn_obsolete_bsdism("getsockopt");
+ break;
+
+ case SO_TIMESTAMP:
+ v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+ !sock_flag(sk, SOCK_RCVTSTAMPNS);
+ break;
+
+ case SO_TIMESTAMPNS:
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+ break;
+
+ case SO_RCVTIMEO:
+ lv=sizeof(struct timeval);
+ if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+ v.tm.tv_sec = 0;
+ v.tm.tv_usec = 0;
+ } else {
+ v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+ v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+ }
+ break;
+
+ case SO_SNDTIMEO:
+ lv=sizeof(struct timeval);
+ if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+ v.tm.tv_sec = 0;
+ v.tm.tv_usec = 0;
+ } else {
+ v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+ v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+ }
+ break;
- case SO_RCVTIMEO:
- lv=sizeof(struct timeval);
- if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
- v.tm.tv_sec = 0;
- v.tm.tv_usec = 0;
- } else {
- v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
- }
- break;
+ case SO_RCVLOWAT:
+ v.val = sk->sk_rcvlowat;
+ break;
- case SO_SNDTIMEO:
- lv=sizeof(struct timeval);
- if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
- v.tm.tv_sec = 0;
- v.tm.tv_usec = 0;
- } else {
- v.tm.tv_sec = sk->sk_sndtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
- }
- break;
+ case SO_SNDLOWAT:
+ v.val=1;
+ break;
- case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
- break;
+ case SO_PASSCRED:
+ v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+ break;
- case SO_SNDLOWAT:
- v.val=1;
- break;
+ case SO_PEERCRED:
+ if (len > sizeof(sk->sk_peercred))
+ len = sizeof(sk->sk_peercred);
+ if (copy_to_user(optval, &sk->sk_peercred, len))
+ return -EFAULT;
+ goto lenout;
- case SO_PASSCRED:
- v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
- break;
-
- case SO_PEERCRED:
- if (len > sizeof(sk->sk_peercred))
- len = sizeof(sk->sk_peercred);
- if (copy_to_user(optval, &sk->sk_peercred, len))
- return -EFAULT;
- goto lenout;
-
- case SO_PEERNAME:
- {
- char address[128];
-
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
- return -ENOTCONN;
- if (lv < len)
- return -EINVAL;
- if (copy_to_user(optval, address, len))
- return -EFAULT;
- goto lenout;
- }
+ case SO_PEERNAME:
+ {
+ char address[128];
+
+ if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ return -ENOTCONN;
+ if (lv < len)
+ return -EINVAL;
+ if (copy_to_user(optval, address, len))
+ return -EFAULT;
+ goto lenout;
+ }
- /* Dubious BSD thing... Probably nobody even uses it, but
- * the UNIX standard wants it for whatever reason... -DaveM
- */
- case SO_ACCEPTCONN:
- v.val = sk->sk_state == TCP_LISTEN;
- break;
+ /* Dubious BSD thing... Probably nobody even uses it, but
+ * the UNIX standard wants it for whatever reason... -DaveM
+ */
+ case SO_ACCEPTCONN:
+ v.val = sk->sk_state == TCP_LISTEN;
+ break;
- case SO_PASSSEC:
- v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
- break;
+ case SO_PASSSEC:
+ v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+ break;
- case SO_PEERSEC:
- return security_socket_getpeersec_stream(sock, optval, optlen, len);
+ case SO_PEERSEC:
+ return security_socket_getpeersec_stream(sock, optval, optlen, len);
- default:
- return(-ENOPROTOOPT);
+ default:
+ return -ENOPROTOOPT;
}
+
if (len > lv)
len = lv;
if (copy_to_user(optval, &v, len))
@@ -808,7 +819,7 @@ lenout:
*
* (We also register the sk_lock with the lock validator.)
*/
-static void inline sock_lock_init(struct sock *sk)
+static inline void sock_lock_init(struct sock *sk)
{
sock_lock_init_class_and_name(sk,
af_family_slock_key_strings[sk->sk_family],
@@ -904,6 +915,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
atomic_set(&newsk->sk_rmem_alloc, 0);
atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +935,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
newsk->sk_send_head = NULL;
- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
@@ -970,6 +981,21 @@ out:
EXPORT_SYMBOL_GPL(sk_clone);
+void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+ __sk_dst_set(sk, dst);
+ sk->sk_route_caps = dst->dev->features;
+ if (sk->sk_route_caps & NETIF_F_GSO)
+ sk->sk_route_caps |= NETIF_F_GSO_MASK;
+ if (sk_can_gso(sk)) {
+ if (dst->header_len)
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ else
+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ }
+}
+EXPORT_SYMBOL_GPL(sk_setup_caps);
+
void __init sk_init(void)
{
if (num_physpages <= 4096) {
@@ -1220,13 +1246,13 @@ static void __lock_sock(struct sock *sk)
{
DEFINE_WAIT(wait);
- for(;;) {
+ for (;;) {
prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_bh(&sk->sk_lock.slock);
schedule();
spin_lock_bh(&sk->sk_lock.slock);
- if(!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk))
break;
}
finish_wait(&sk->sk_lock.wq, &wait);
@@ -1258,7 +1284,7 @@ static void __release_sock(struct sock *sk)
} while (skb != NULL);
bh_lock_sock(sk);
- } while((skb = sk->sk_backlog.head) != NULL);
+ } while ((skb = sk->sk_backlog.head) != NULL);
}
/**
@@ -1420,7 +1446,7 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
@@ -1482,8 +1508,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sock_set_flag(sk, SOCK_ZAPPED);
- if(sock)
- {
+ if (sock) {
sk->sk_type = sock->type;
sk->sk_sleep = &sock->wait;
sock->sk = sk;
@@ -1512,8 +1537,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp.tv_sec = -1L;
- sk->sk_stamp.tv_usec = -1L;
+ sk->sk_stamp = ktime_set(-1L, -1L);
atomic_set(&sk->sk_refcnt, 1);
}
@@ -1554,17 +1578,36 @@ EXPORT_SYMBOL(release_sock);
int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
{
+ struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk);
- if (sk->sk_stamp.tv_sec == -1)
+ tv = ktime_to_timeval(sk->sk_stamp);
+ if (tv.tv_sec == -1)
return -ENOENT;
- if (sk->sk_stamp.tv_sec == 0)
- do_gettimeofday(&sk->sk_stamp);
- return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
- -EFAULT : 0;
+ if (tv.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ tv = ktime_to_timeval(sk->sk_stamp);
+ }
+ return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
}
EXPORT_SYMBOL(sock_get_timestamp);
+int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+ struct timespec ts;
+ if (!sock_flag(sk, SOCK_TIMESTAMP))
+ sock_enable_timestamp(sk);
+ ts = ktime_to_timespec(sk->sk_stamp);
+ if (ts.tv_sec == -1)
+ return -ENOENT;
+ if (ts.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ ts = ktime_to_timespec(sk->sk_stamp);
+ }
+ return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestampns);
+
void sock_enable_timestamp(struct sock *sk)
{
if (!sock_flag(sk, SOCK_TIMESTAMP)) {
@@ -1899,7 +1942,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations proto_seq_ops = {
+static const struct seq_operations proto_seq_ops = {
.start = proto_seq_start,
.next = proto_seq_next,
.stop = proto_seq_stop,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1e75b1585460..b29712033dd4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -136,6 +136,14 @@ ctl_table core_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ {
+ .ctl_name = NET_CORE_WARNINGS,
+ .procname = "warnings",
+ .data = &net_msg_warn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
{ .ctl_name = 0 }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 07236c17fab9..adecfd281ae9 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,8 +30,10 @@
#include <asm/system.h>
#include <asm/uaccess.h>
-int net_msg_cost = 5*HZ;
-int net_msg_burst = 10;
+int net_msg_cost __read_mostly = 5*HZ;
+int net_msg_burst __read_mostly = 10;
+int net_msg_warn __read_mostly = 1;
+EXPORT_SYMBOL(net_msg_warn);
/*
* All net warning printk()s should be guarded by this function.
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index a086c6312d3b..01030f346177 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
if (av != NULL) {
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
- av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
+ av->dccpav_buf_ackno = UINT48_MAX + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 746f79d104b3..d7d9ce737244 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -33,7 +33,6 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
#include "../ccid.h"
#include "../dccp.h"
#include "lib/packet_history.h"
@@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
static struct dccp_li_hist *ccid3_li_hist;
+/*
+ * Transmitter Half-Connection Routines
+ */
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
@@ -80,23 +82,37 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
}
/*
- * Recalculate scheduled nominal send time t_nom, inter-packet interval
- * t_ipi, and delta value. Should be called after each change to X.
+ * Compute the initial sending rate X_init according to RFC 3390:
+ * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes))
+ * X_init = w_init / RTT
+ * For consistency with other parts of the code, X_init is scaled by 2^6.
*/
-static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
+static inline u64 rfc3390_initial_rate(struct sock *sk)
{
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const __u32 w_init = min(4 * dp->dccps_mss_cache,
+ max(2 * dp->dccps_mss_cache, 4380U));
- /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
- hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x >> 6);
+ return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
+}
- /* Update nominal send time with regard to the new t_ipi */
- timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+/*
+ * Recalculate t_ipi and delta (should be called whenever X changes)
+ */
+static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
+{
+ /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+ hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_x);
/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
TFRC_OPSYS_HALF_TIME_GRAN);
+
+ ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
+ hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
+ hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
+
}
/*
* Update X by
@@ -112,19 +128,28 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
* fine-grained resolution of sending rates. This requires scaling by 2^6
* throughout the code. Only X_calc is unscaled (in bytes/second).
*
- * If X has changed, we also update the scheduled send time t_now,
- * the inter-packet interval t_ipi, and the delta value.
*/
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ __u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
const __u64 old_x = hctx->ccid3hctx_x;
+ /*
+ * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
+ * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
+ * For consistency with X and X_recv, min_rate is also scaled by 2^6.
+ */
+ if (unlikely(hctx->ccid3hctx_idle)) {
+ min_rate = rfc3390_initial_rate(sk);
+ min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
+ }
+
if (hctx->ccid3hctx_p > 0) {
hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
- hctx->ccid3hctx_x_recv * 2);
+ min_rate);
hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
(((__u64)hctx->ccid3hctx_s) << 6) /
TFRC_T_MBI);
@@ -133,14 +158,21 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
hctx->ccid3hctx_x =
- max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+ max(min(2 * hctx->ccid3hctx_x, min_rate),
scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
hctx->ccid3hctx_rtt));
hctx->ccid3hctx_t_ld = *now;
}
- if (hctx->ccid3hctx_x != old_x)
- ccid3_update_send_time(hctx);
+ if (hctx->ccid3hctx_x != old_x) {
+ ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
+ "X_recv=%u\n", (unsigned)(old_x >> 6),
+ (unsigned)(hctx->ccid3hctx_x >> 6),
+ hctx->ccid3hctx_x_calc,
+ (unsigned)(hctx->ccid3hctx_x_recv >> 6));
+
+ ccid3_update_send_interval(hctx);
+ }
}
/*
@@ -149,17 +181,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
*/
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
- if (unlikely(len == 0))
- ccid3_pr_debug("Packet payload length is 0 - not updating\n");
- else
- hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
- (9 * hctx->ccid3hctx_s + len) / 10;
- /*
- * Note: We could do a potential optimisation here - when `s' changes,
- * recalculate sending rate and consequently t_ipi, t_delta, and
- * t_now. This is however non-standard, and the benefits are not
- * clear, so it is currently left out.
- */
+ const u16 old_s = hctx->ccid3hctx_s;
+
+ hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
+
+ if (hctx->ccid3hctx_s != old_s)
+ ccid3_update_send_interval(hctx);
}
/*
@@ -193,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ struct timeval now;
unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
@@ -205,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
+ hctx->ccid3hctx_idle = 1;
+
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_FBACK:
/* RFC 3448, 4.4: Halve send rate directly */
@@ -219,53 +249,37 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
/* The value of R is still undefined and so we can not recompute
* the timout value. Keep initial value as per [RFC 4342, 5]. */
t_nfb = TFRC_INITIAL_TIMEOUT;
- ccid3_update_send_time(hctx);
+ ccid3_update_send_interval(hctx);
break;
case TFRC_SSTATE_FBACK:
/*
- * Check if IDLE since last timeout and recv rate is less than
- * 4 packets (in units of 64*bytes/sec) per RTT
+ * Modify the cached value of X_recv [RFC 3448, 4.4]
+ *
+ * If (p == 0 || X_calc > 2 * X_recv)
+ * X_recv = max(X_recv / 2, s / (2 * t_mbi));
+ * Else
+ * X_recv = X_calc / 4;
+ *
+ * Note that X_recv is scaled by 2^6 while X_calc is not
*/
- if (!hctx->ccid3hctx_idle ||
- (hctx->ccid3hctx_x_recv >= 4 *
- scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
- hctx->ccid3hctx_rtt))) {
- struct timeval now;
+ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
- ccid3_pr_debug("%s(%p, state=%s), not idle\n",
- dccp_role(sk), sk,
- ccid3_tx_state_name(hctx->ccid3hctx_state));
+ if (hctx->ccid3hctx_p == 0 ||
+ (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
- /*
- * Modify the cached value of X_recv [RFC 3448, 4.4]
- *
- * If (p == 0 || X_calc > 2 * X_recv)
- * X_recv = max(X_recv / 2, s / (2 * t_mbi));
- * Else
- * X_recv = X_calc / 4;
- *
- * Note that X_recv is scaled by 2^6 while X_calc is not
- */
- BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
-
- if (hctx->ccid3hctx_p == 0 ||
- (hctx->ccid3hctx_x_calc >
- (hctx->ccid3hctx_x_recv >> 5))) {
-
- hctx->ccid3hctx_x_recv =
- max(hctx->ccid3hctx_x_recv / 2,
- (((__u64)hctx->ccid3hctx_s) << 6) /
- (2 * TFRC_T_MBI));
-
- if (hctx->ccid3hctx_p == 0)
- dccp_timestamp(sk, &now);
- } else {
- hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
- hctx->ccid3hctx_x_recv <<= 4;
- }
- /* Now recalculate X [RFC 3448, 4.3, step (4)] */
- ccid3_hc_tx_update_x(sk, &now);
+ hctx->ccid3hctx_x_recv =
+ max(hctx->ccid3hctx_x_recv / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ (2 * TFRC_T_MBI));
+
+ if (hctx->ccid3hctx_p == 0)
+ dccp_timestamp(sk, &now);
+ } else {
+ hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+ hctx->ccid3hctx_x_recv <<= 4;
}
+ /* Now recalculate X [RFC 3448, 4.3, step (4)] */
+ ccid3_hc_tx_update_x(sk, &now);
/*
* Schedule no feedback timer to expire in
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -280,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
goto out;
}
- hctx->ccid3hctx_idle = 1;
-
restart_timer:
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
jiffies + usecs_to_jiffies(t_nfb));
@@ -322,24 +334,35 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
hctx->ccid3hctx_last_win_count = 0;
hctx->ccid3hctx_t_last_win_count = now;
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
- /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
- ccid3_hc_tx_update_s(hctx, skb->len);
- hctx->ccid3hctx_x = hctx->ccid3hctx_s;
- hctx->ccid3hctx_x <<= 6;
-
- /* First timeout, according to [RFC 3448, 4.2], is 1 second */
- hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
- /* Initial delta: minimum of 0.5 sec and t_gran/2 */
- hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
/* Set t_0 for initial packet */
hctx->ccid3hctx_t_nom = now;
+
+ hctx->ccid3hctx_s = skb->len;
+
+ /*
+ * Use initial RTT sample when available: recommended by erratum
+ * to RFC 4342. This implements the initialisation procedure of
+ * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
+ */
+ if (dp->dccps_syn_rtt) {
+ ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+ hctx->ccid3hctx_rtt = dp->dccps_syn_rtt;
+ hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
+ hctx->ccid3hctx_t_ld = now;
+ } else {
+ /* Sender does not have RTT sample: X = MSS/second */
+ hctx->ccid3hctx_x = dp->dccps_mss_cache;
+ hctx->ccid3hctx_x <<= 6;
+ }
+ ccid3_update_send_interval(hctx);
+
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
break;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+ ccid3_pr_debug("delay=%ld\n", (long)delay);
/*
* Scheduling of packet transmissions [RFC 3448, 4.6]
*
@@ -361,6 +384,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
/* prepare to send now (add options etc.) */
dp->dccps_hc_tx_insert_options = 1;
DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+ hctx->ccid3hctx_idle = 0;
/* set the nominal send time for the next following packet */
timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -391,7 +415,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
packet->dccphtx_sent = 1;
- hctx->ccid3hctx_idle = 0;
}
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -402,8 +425,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct dccp_tx_hist_entry *packet;
struct timeval now;
unsigned long t_nfb;
- u32 pinv;
- suseconds_t r_sample, t_elapsed;
+ u32 pinv, r_sample;
BUG_ON(hctx == NULL);
@@ -445,18 +467,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* Calculate new round trip sample as per [RFC 3448, 4.3] by
* R_sample = (now - t_recvdata) - t_elapsed
*/
- r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
- t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
-
- DCCP_BUG_ON(r_sample < 0);
- if (unlikely(r_sample <= t_elapsed))
- DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
- (int)r_sample, (int)t_elapsed);
- else
- r_sample -= t_elapsed;
- CCID3_RTT_SANITY_CHECK(r_sample);
+ r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
- /* Update RTT estimate by
+ /*
+ * Update RTT estimate by
* If (No feedback recv)
* R = R_sample;
* Else
@@ -467,27 +481,23 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
/*
* Larger Initial Windows [RFC 4342, sec. 5]
- * We deviate in that we use `s' instead of `MSS'.
*/
- __u64 w_init = min(4 * hctx->ccid3hctx_s,
- max(2 * hctx->ccid3hctx_s, 4380));
hctx->ccid3hctx_rtt = r_sample;
- hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample);
+ hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
hctx->ccid3hctx_t_ld = now;
- ccid3_update_send_time(hctx);
+ ccid3_update_send_interval(hctx);
- ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
- "R_sample=%dus, X=%u\n", dccp_role(sk),
+ ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
+ "R_sample=%uus, X=%u\n", dccp_role(sk),
sk, hctx->ccid3hctx_s,
- (unsigned long long)w_init,
- (int)r_sample,
+ dp->dccps_mss_cache, r_sample,
(unsigned)(hctx->ccid3hctx_x >> 6));
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
} else {
hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
- (u32)r_sample) / 10;
+ r_sample) / 10;
/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
if (hctx->ccid3hctx_p > 0)
@@ -497,10 +507,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hctx->ccid3hctx_p);
ccid3_hc_tx_update_x(sk, &now);
- ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+ ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
"p=%u, X_calc=%u, X_recv=%u, X=%u\n",
dccp_role(sk),
- sk, hctx->ccid3hctx_rtt, (int)r_sample,
+ sk, hctx->ccid3hctx_rtt, r_sample,
hctx->ccid3hctx_s, hctx->ccid3hctx_p,
hctx->ccid3hctx_x_calc,
(unsigned)(hctx->ccid3hctx_x_recv >> 6),
@@ -644,10 +654,50 @@ static void ccid3_hc_tx_exit(struct sock *sk)
dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
}
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+ const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+
+ /* Listen socks doesn't have a private CCID block */
+ if (sk->sk_state == DCCP_LISTEN)
+ return;
+
+ BUG_ON(hctx == NULL);
+
+ info->tcpi_rto = hctx->ccid3hctx_t_rto;
+ info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+ u32 __user *optval, int __user *optlen)
+{
+ const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ const void *val;
+
+ /* Listen socks doesn't have a private CCID block */
+ if (sk->sk_state == DCCP_LISTEN)
+ return -EINVAL;
+
+ switch (optname) {
+ case DCCP_SOCKOPT_CCID_TX_INFO:
+ if (len < sizeof(hctx->ccid3hctx_tfrc))
+ return -EINVAL;
+ len = sizeof(hctx->ccid3hctx_tfrc);
+ val = &hctx->ccid3hctx_tfrc;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
- * RX Half Connection methods
+ * Receiver Half-Connection Routines
*/
-
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
@@ -977,8 +1027,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
const struct dccp_options_received *opt_recv;
struct dccp_rx_hist_entry *packet;
struct timeval now;
- u32 p_prev, rtt_prev;
- suseconds_t r_sample, t_elapsed;
+ u32 p_prev, r_sample, rtt_prev;
int loss, payload_size;
BUG_ON(hcrx == NULL);
@@ -994,17 +1043,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
break;
rtt_prev = hcrx->ccid3hcrx_rtt;
dccp_timestamp(sk, &now);
- timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
- r_sample = timeval_usecs(&now);
- t_elapsed = opt_recv->dccpor_elapsed_time * 10;
-
- DCCP_BUG_ON(r_sample < 0);
- if (unlikely(r_sample <= t_elapsed))
- DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
- (long)r_sample, (long)t_elapsed);
- else
- r_sample -= t_elapsed;
- CCID3_RTT_SANITY_CHECK(r_sample);
+ r_sample = dccp_sample_rtt(sk, &now, NULL);
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
hcrx->ccid3hcrx_rtt = r_sample;
@@ -1132,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
}
-static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return;
-
- BUG_ON(hctx == NULL);
-
- info->tcpi_rto = hctx->ccid3hctx_t_rto;
- info->tcpi_rtt = hctx->ccid3hctx_rtt;
-}
-
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
@@ -1173,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
return 0;
}
-static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- const void *val;
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return -EINVAL;
-
- switch (optname) {
- case DCCP_SOCKOPT_CCID_TX_INFO:
- if (len < sizeof(hctx->ccid3hctx_tfrc))
- return -EINVAL;
- len = sizeof(hctx->ccid3hctx_tfrc);
- val = &hctx->ccid3hctx_tfrc;
- break;
- default:
- return -ENOPROTOOPT;
- }
-
- if (put_user(len, optlen) || copy_to_user(optval, val, len))
- return -EFAULT;
-
- return 0;
-}
-
static struct ccid_operations ccid3 = {
.ccid_id = DCCPC_CCID3,
.ccid_name = "ccid3",
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 15776a88c090..8d31b389c19c 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,16 +51,6 @@
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
#define TFRC_T_MBI 64
-/* What we think is a reasonable upper limit on RTT values */
-#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
-
-#define CCID3_RTT_SANITY_CHECK(rtt) do { \
- if (rtt > CCID3_SANE_RTT_MAX) { \
- DCCP_CRIT("RTT (%d) too large, substituting %d", \
- (int)rtt, (int)CCID3_SANE_RTT_MAX); \
- rtt = CCID3_SANE_RTT_MAX; \
- } } while (0)
-
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
TFRC_OPT_LOSS_INTERVALS = 193,
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 0a0baef16b3e..372d7e75cdd8 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
- if (li_entry->dccplih_interval != ~0) {
+ if (li_entry->dccplih_interval != ~0U) {
i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
w_tot += dccp_li_hist_w[i];
if (i != 0)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e33a9edb4036..d8ad27bfe01a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -31,13 +31,9 @@
__stringify(cond)); \
} while (0)
-#ifdef MODULE
#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
printk(fmt, ##args); \
} while(0)
-#else
-#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
-#endif
#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
"%s: " fmt, __FUNCTION__, ##a)
@@ -75,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+/* bounds for sampled RTT values from packet exchanges (in usec) */
+#define DCCP_SANE_RTT_MIN 100
+#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC)
+
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
-
/* sysctl variables for DCCP */
extern int sysctl_dccp_request_retries;
extern int sysctl_dccp_retries1;
@@ -92,17 +92,43 @@ extern int sysctl_dccp_feat_send_ack_vector;
extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
+/*
+ * 48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN 0x800000000000LL /* 2^47 */
+#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
+#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
+#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
+#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+ *seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+ *seqno = ADD48(*seqno, 1);
+}
+
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
+{
+ u64 delta = SUB48(seqno2, seqno1);
+
+ return TO_SIGNED48(delta);
+}
+
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
- return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+ return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
}
/* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
- return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2) before48(seq2, seq1)
/* is seq2 <= seq1 <= seq3 ? */
static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
@@ -118,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
/* is seq1 next seqno after seq2 */
static inline int follows48(const u64 seq1, const u64 seq2)
{
- int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
-
- return diff==1;
+ return dccp_delta_seqno(seq2, seq1) == 1;
}
enum {
@@ -272,6 +296,8 @@ extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
+extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+ struct timeval *t_history);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
@@ -313,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb)
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
- if (value > DCCP_MAX_SEQNO)
- value -= DCCP_MAX_SEQNO + 1;
- *seqno = value;
-}
-
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
- return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
-}
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
- if (++*seqno > DCCP_MAX_SEQNO)
- *seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 78b043c458bf..da6ec185ed5b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh) &&
- !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+ dccp_delta_seqno(dp->dccps_swl,
+ DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
else
return -1;
@@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (dp->dccps_role != DCCP_ROLE_CLIENT)
goto send_sync;
check_seq:
- if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+ if (dccp_delta_seqno(dp->dccps_osr,
+ DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNC);
@@ -298,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
if (dccp_parse_options(sk, skb))
goto out_invalid_packet;
+ /* Obtain RTT sample from SYN exchange (used by CCID 3) */
+ if (dp->dccps_options_received.dccpor_timestamp_echo) {
+ struct timeval now;
+
+ dccp_timestamp(sk, &now);
+ dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
+ }
+
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
@@ -575,3 +585,43 @@ discard:
}
EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
+
+/**
+ * dccp_sample_rtt - Sample RTT from packet exchange
+ *
+ * @sk: connected dccp_sock
+ * @t_recv: receive timestamp of packet with timestamp echo
+ * @t_hist: packet history timestamp or NULL
+ */
+u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+ struct timeval *t_hist)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_options_received *or = &dp->dccps_options_received;
+ suseconds_t delta;
+
+ if (t_hist == NULL) {
+ if (!or->dccpor_timestamp_echo) {
+ DCCP_WARN("packet without timestamp echo\n");
+ return DCCP_SANE_RTT_MAX;
+ }
+ timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
+ delta = timeval_usecs(t_recv);
+ } else
+ delta = timeval_delta(t_recv, t_hist);
+
+ delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */
+
+ if (unlikely(delta <= 0)) {
+ DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
+ return DCCP_SANE_RTT_MIN;
+ }
+ if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
+ DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
+ return DCCP_SANE_RTT_MAX;
+ }
+
+ return delta;
+}
+
+EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4a83978aa660..718f2fa923a1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
(iph->ihl << 2));
struct dccp_sock *dp;
struct inet_sock *inet;
- const int type = skb->h.icmph->type;
- const int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
__u64 seq;
int err;
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check);
static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
{
- return secure_dccp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
dccp_hdr(skb)->dccph_dport,
dccp_hdr(skb)->dccph_sport);
}
@@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
- newinet->mc_ttl = skb->nh.iph->ttl;
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->id = jiffies;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
struct rtable *rt;
struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
.nl_u = { .ip4_u =
- { .daddr = skb->nh.iph->saddr,
- .saddr = skb->nh.iph->daddr,
+ { .daddr = ip_hdr(skb)->saddr,
+ .saddr = ip_hdr(skb)->daddr,
.tos = RT_CONN_FLAGS(sk) } },
.proto = sk->sk_protocol,
.uli_u = { .ports =
@@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ const struct iphdr *rxiph;
const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
@@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
- rxskb->nh.iph->daddr);
+ rxiph = ip_hdr(rxskb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
+ rxiph->daddr);
bh_lock_sock(dccp_v4_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
- rxskb->nh.iph->daddr,
- rxskb->nh.iph->saddr, NULL);
+ rxiph->daddr, rxiph->saddr, NULL);
bh_unlock_sock(dccp_v4_ctl_socket->sk);
if (net_xmit_eval(err) == 0) {
@@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq = inet_rsk(req);
- ireq->loc_addr = skb->nh.iph->daddr;
- ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->loc_addr = ip_hdr(skb)->daddr;
+ ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
/*
@@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
+ const struct iphdr *iph;
struct sock *sk;
int min_cov;
@@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb)
if (dccp_invalid_packet(skb))
goto discard_it;
+ iph = ip_hdr(skb);
/* Step 1: If header checksum is incorrect, drop packet and return */
- if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+ if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
"src=%u.%u.%u.%u@%-5d "
"dst=%u.%u.%u.%u@%-5d seq=%llu",
dccp_packet_name(dh->dccph_type),
- NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
- NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+ NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
+ NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
if (dccp_packet_without_ack(skb)) {
@@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
- skb->nh.iph->saddr, dh->dccph_sport,
- skb->nh.iph->daddr, dh->dccph_dport,
- inet_iif(skb));
-
+ iph->saddr, dh->dccph_sport,
+ iph->daddr, dh->dccph_dport, inet_iif(skb));
/*
* Step 2:
* If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db3967..64eac2515aa2 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
{
- return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
+ return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
dccp_hdr(skb)->dccph_dport,
dccp_hdr(skb)->dccph_sport );
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
- (struct ipv6_rt_hdr *)(pktopts->nh.raw +
- rxopt->srcrt));
+ (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+ rxopt->srcrt));
}
if (opt != NULL && opt->srcrt != NULL) {
@@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ struct ipv6hdr *rxip6h;
const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
@@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
- &rxskb->nh.ipv6h->daddr);
+ rxip6h = ipv6_hdr(rxskb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
+ &rxip6h->daddr);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
fl.proto = IPPROTO_DCCP;
fl.oif = inet6_iif(rxskb);
@@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct ipv6hdr *iph = skb->nh.ipv6h;
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq6 = inet6_rsk(req);
- ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
ireq6->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
@@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -573,8 +575,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
- (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
- rxopt->srcrt));
+ (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
+ rxopt->srcrt));
}
if (dst == NULL) {
@@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* Clone native IPv6 options from listening socket (if any)
@@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
goto discard_it;
/* Step 1: If header checksum is incorrect, drop packet and return. */
- if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr)) {
+ if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr)) {
DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
- sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+ sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
dh->dccph_sport,
- &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+ &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
/*
* Step 2:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6d235b3013dd..e18e249ac49b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,7 +27,7 @@
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
.period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = SPIN_LOCK_UNLOCKED,
+ .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
.hashinfo = &dccp_hashinfo,
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&dccp_death_row),
diff --git a/net/dccp/options.c b/net/dccp/options.c
index ca13f7731994..34d536d5f1a1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
-
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
@@ -174,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
- "ackno=%llu, ", dccp_role(sk),
+ "ackno=%llu", dccp_role(sk),
opt_recv->dccpor_timestamp_echo,
len + 2,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
- if (len == 4)
+ if (len == 4) {
+ dccp_pr_debug_cat("\n");
break;
+ }
if (len == 6)
elapsed_time = ntohs(*(__be16 *)(value + 4));
else
elapsed_time = ntohl(*(__be32 *)(value + 4));
+ dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
+
/* Give precedence to the biggest ELAPSED_TIME */
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
@@ -565,6 +567,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
dccp_insert_options_feat(sk, skb))
return -1;
+ /*
+ * Obtain RTT sample from Request/Response exchange.
+ * This is currently used in CCID 3 initialisation.
+ */
+ if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
+ dccp_insert_option_timestamp(sk, skb))
+ return -1;
+
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 3282f2f2291b..c8d843e983fc 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (rc <= 0)
break;
+ dccp_pr_debug("delayed send by %d msec\n", rc);
delay = msecs_to_jiffies(rc);
sk->sk_write_pending++;
release_sock(sk);
@@ -213,19 +214,6 @@ do_interrupted:
goto out;
}
-static void dccp_write_xmit_timer(unsigned long data) {
- struct sock *sk = (struct sock *)data;
- struct dccp_sock *dp = dccp_sk(sk);
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
- sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
- else
- dccp_write_xmit(sk, 0);
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
void dccp_write_xmit(struct sock *sk, int block)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -268,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block)
DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
err);
} else {
- dccp_pr_debug("packet discarded\n");
+ dccp_pr_debug("packet discarded due to err=%d\n", err);
kfree_skb(skb);
}
}
@@ -434,9 +422,6 @@ static inline void dccp_connect_init(struct sock *sk)
dp->dccps_gar = dp->dccps_iss;
icsk->icsk_retransmits = 0;
- init_timer(&dp->dccps_xmit_timer);
- dp->dccps_xmit_timer.data = (unsigned long)sk;
- dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
}
int dccp_connect(struct sock *sk)
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 3b1f509f51dd..1f5e3ba62065 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (port == 0 || ntohs(inet->dport) == port ||
ntohs(inet->sport) == port) {
if (hctx)
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size,
- hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
- hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+ "%llu %llu %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
+ hctx->ccid3hctx_x_recv >> 6,
+ hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
else
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size);
}
jprobe_return();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index cf28c53a389a..6607b7b14f34 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -575,7 +575,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- if (len < sizeof(int))
+ if (len < (int)sizeof(int))
return -EINVAL;
dp = dccp_sk(sk);
@@ -589,9 +589,11 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_SEND_CSCOV:
val = dp->dccps_pcslen;
+ len = sizeof(val);
break;
case DCCP_SOCKOPT_RECV_CSCOV:
val = dp->dccps_pcrlen;
+ len = sizeof(val);
break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 41ea0f6594c4..0197a41c256a 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -261,8 +261,33 @@ out:
sock_put(sk);
}
+/* Transmit-delay timer: used by the CCIDs to delay actual send time */
+static void dccp_write_xmit_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+ else
+ dccp_write_xmit(sk, 0);
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+static void dccp_init_write_xmit_timer(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ init_timer(&dp->dccps_xmit_timer);
+ dp->dccps_xmit_timer.data = (unsigned long)sk;
+ dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
+}
+
void dccp_init_xmit_timers(struct sock *sk)
{
+ dccp_init_write_xmit_timer(sk);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c6568d637e1a..9fbe87c93802 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -721,7 +721,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
- struct net_device *dev;
+ struct net_device *dev, *ldev;
int rv;
if (addr_len != sizeof(struct sockaddr_dn))
@@ -746,14 +746,17 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(saddr->sdn_flags & SDF_WILD)) {
if (dn_ntohs(saddr->sdn_nodeaddrl)) {
read_lock(&dev_base_lock);
- for(dev = dev_base; dev; dev = dev->next) {
+ ldev = NULL;
+ for_each_netdev(dev) {
if (!dev->dn_ptr)
continue;
- if (dn_dev_islocal(dev, dn_saddr2dn(saddr)))
+ if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+ ldev = dev;
break;
+ }
}
read_unlock(&dev_base_lock);
- if (dev == NULL)
+ if (ldev == NULL)
return -EADDRNOTAVAIL;
}
}
@@ -2413,6 +2416,7 @@ module_init(decnet_init);
static void __exit decnet_exit(void)
{
sock_unregister(AF_DECnet);
+ rtnl_unregister_all(PF_DECnet);
dev_remove_pack(&dn_dix_packet_type);
dn_unregister_sysctl();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 060d725e2942..764a56a13e38 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,10 +799,10 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
skip_ndevs = cb->args[0];
skip_naddr = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < skip_ndevs)
- continue;
+ goto cont;
else if (idx > skip_ndevs) {
/* Only skip over addresses for first dev dumped
* in this iteration (idx == skip_ndevs) */
@@ -810,22 +810,22 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
}
if ((dn_db = dev->dn_ptr) == NULL)
- continue;
+ goto cont;
for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
ifa = ifa->ifa_next, dn_idx++) {
if (dn_idx < skip_naddr)
- continue;
+ goto cont;
if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWADDR,
NLM_F_MULTI) < 0)
goto done;
}
+cont:
+ idx++;
}
done:
- read_unlock(&dev_base_lock);
-
cb->args[0] = idx;
cb->args[1] = dn_idx;
@@ -913,7 +913,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
pktlen = (__le16 *)skb_push(skb,2);
*pktlen = dn_htons(skb->len - 2);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
}
@@ -1005,7 +1005,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
pktlen = (__le16 *)skb_push(skb, 2);
*pktlen = dn_htons(skb->len - 2);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (dn_am_i_a_router(dn, dn_db, ifa)) {
struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
@@ -1299,7 +1299,7 @@ void dn_dev_devices_off(void)
struct net_device *dev;
rtnl_lock();
- for(dev = dev_base; dev; dev = dev->next)
+ for_each_netdev(dev)
dn_dev_down(dev);
rtnl_unlock();
@@ -1310,7 +1310,7 @@ void dn_dev_devices_on(void)
struct net_device *dev;
rtnl_lock();
- for(dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (dev->flags & IFF_UP)
dn_dev_up(dev);
}
@@ -1328,62 +1328,56 @@ int unregister_dnaddr_notifier(struct notifier_block *nb)
}
#ifdef CONFIG_PROC_FS
-static inline struct net_device *dn_dev_get_next(struct seq_file *seq, struct net_device *dev)
+static inline int is_dn_dev(struct net_device *dev)
{
- do {
- dev = dev->next;
- } while(dev && !dev->dn_ptr);
-
- return dev;
+ return dev->dn_ptr != NULL;
}
-static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos)
+static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ int i;
struct net_device *dev;
- dev = dev_base;
- if (dev && !dev->dn_ptr)
- dev = dn_dev_get_next(seq, dev);
- if (pos) {
- while(dev && (dev = dn_dev_get_next(seq, dev)))
- --pos;
- }
- return dev;
-}
+ read_lock(&dev_base_lock);
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- if (*pos) {
- struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = dn_dev_get_idx(seq, *pos - 1);
- if (dev == NULL)
- read_unlock(&dev_base_lock);
- return dev;
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ i = 1;
+ for_each_netdev(dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+ if (i++ == *pos)
+ return dev;
}
- return SEQ_START_TOKEN;
+
+ return NULL;
}
static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = v;
- loff_t one = 1;
+ struct net_device *dev;
- if (v == SEQ_START_TOKEN) {
- dev = dn_dev_seq_start(seq, &one);
- } else {
- dev = dn_dev_get_next(seq, dev);
- if (dev == NULL)
- read_unlock(&dev_base_lock);
- }
++*pos;
- return dev;
+
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+ dev = net_device_entry(&dev_base_head);
+
+ for_each_netdev_continue(dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+ return dev;
+ }
+
+ return NULL;
}
static void dn_dev_seq_stop(struct seq_file *seq, void *v)
{
- if (v && v != SEQ_START_TOKEN)
- read_unlock(&dev_base_lock);
+ read_unlock(&dev_base_lock);
}
static char *dn_type2asc(char type)
@@ -1447,24 +1441,6 @@ static const struct file_operations dn_dev_seq_fops = {
#endif /* CONFIG_PROC_FS */
-static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
- [RTM_NEWADDR - RTM_BASE] = { .doit = dn_nl_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = dn_nl_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_nl_dump_ifaddr, },
-#ifdef CONFIG_DECNET_ROUTER
- [RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
- .dumpit = dn_fib_dump, },
- [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, },
-#else
- [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
- .dumpit = dn_cache_dump, },
-#endif
-
-};
-
static int __initdata addr[2];
module_param_array(addr, int, NULL, 0444);
MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
@@ -1485,7 +1461,9 @@ void __init dn_dev_init(void)
dn_dev_devices_on();
- rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table;
+ rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
+ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+ rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
@@ -1500,8 +1478,6 @@ void __init dn_dev_init(void)
void __exit dn_dev_cleanup(void)
{
- rtnetlink_links[PF_DECnet] = NULL;
-
#ifdef CONFIG_SYSCTL
{
int i;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3cbfddc98430..d2bc19d47950 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -63,7 +63,7 @@ static struct
{
int error;
u8 scope;
-} dn_fib_props[RTA_MAX+1] = {
+} dn_fib_props[RTN_MAX+1] = {
[RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
[RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE },
[RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST },
@@ -276,6 +276,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
struct dn_fib_info *ofi;
int nhs = 1;
+ if (r->rtm_type > RTN_MAX)
+ goto err_inval;
+
if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
goto err_inval;
@@ -501,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
return 0;
}
-int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct dn_fib_table *tb;
struct rtattr **rta = arg;
@@ -517,7 +520,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -ESRCH;
}
-int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct dn_fib_table *tb;
struct rtattr **rta = arg;
@@ -599,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
/* Scan device list */
read_lock(&dev_base_lock);
- for(dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
dn_db = dev->dn_ptr;
if (dn_db == NULL)
continue;
@@ -745,11 +748,13 @@ void __exit dn_fib_cleanup(void)
void __init dn_fib_init(void)
{
-
dn_fib_table_init();
dn_fib_rules_init();
register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+
+ rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
+ rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index bf701cf5a386..4bf066c416e2 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -261,7 +261,7 @@ static int dn_long_output(struct sk_buff *skb)
lp->s_class = 0;
lp->pt = 0;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
@@ -300,7 +300,7 @@ static int dn_short_output(struct sk_buff *skb)
sp->srcnode = cb->src;
sp->forward = cb->hops & 0x3f;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
@@ -342,7 +342,7 @@ static int dn_phase3_output(struct sk_buff *skb)
sp->srcnode = cb->src & dn_htons(0x03ff);
sp->forward = cb->hops & 0x3f;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 9d20904f6f52..4074a6e5d0de 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -362,7 +362,8 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
scp->conndata_in.opt_optl = dn_htons(dlen);
- memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
+ skb_copy_from_linear_data_offset(skb, 1,
+ scp->conndata_in.opt_data, dlen);
}
}
dn_nsp_send_link(sk, DN_NOCHANGE, 0);
@@ -406,7 +407,7 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
scp->discdata_in.opt_optl = dn_htons(dlen);
- memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
+ skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
}
}
@@ -725,7 +726,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
goto free_out;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
cb->nsp_flags = *ptr++;
if (decnet_debug_level & 2)
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2d2cda82c7db..7404653880b0 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -79,7 +79,7 @@ static void dn_nsp_send(struct sk_buff *skb)
struct dst_entry *dst;
struct flowi fl;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
scp->stamp = jiffies;
dst = sk_dst_check(sk, 0);
@@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
if (scp->peer.sdn_objnum)
type = 0;
- skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type));
- skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2));
+ skb_put(skb, dn_sockaddr2username(&scp->peer,
+ skb_tail_pointer(skb), type));
+ skb_put(skb, dn_sockaddr2username(&scp->addr,
+ skb_tail_pointer(skb), 2));
menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
if (scp->peer.sdn_flags & SDF_PROXY)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c1b5502f195b..a8bf106b7a61 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -77,6 +77,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <asm/errno.h>
+#include <net/netlink.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
@@ -386,7 +387,7 @@ static int dn_return_short(struct sk_buff *skb)
__le16 tmp;
/* Add back headers */
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
@@ -425,7 +426,7 @@ static int dn_return_long(struct sk_buff *skb)
unsigned char tmp[ETH_ALEN];
/* Add back all headers */
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
@@ -504,7 +505,7 @@ static int dn_route_rx_long(struct sk_buff *skb)
goto drop_it;
skb_pull(skb, 20);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
/* Destination info */
ptr += 2;
@@ -542,7 +543,7 @@ static int dn_route_rx_short(struct sk_buff *skb)
goto drop_it;
skb_pull(skb, 5);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
cb->dst = *(__le16 *)ptr;
ptr += 2;
@@ -615,7 +616,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
flags = *skb->data;
}
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/*
* Weed out future version DECnet
@@ -885,7 +886,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
.iif = loopback_dev.ifindex,
.oif = oldflp->oif };
struct dn_route *rt = NULL;
- struct net_device *dev_out = NULL;
+ struct net_device *dev_out = NULL, *dev;
struct neighbour *neigh = NULL;
unsigned hash;
unsigned flags = 0;
@@ -924,15 +925,17 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
goto out;
}
read_lock(&dev_base_lock);
- for(dev_out = dev_base; dev_out; dev_out = dev_out->next) {
- if (!dev_out->dn_ptr)
+ for_each_netdev(dev) {
+ if (!dev->dn_ptr)
continue;
- if (!dn_dev_islocal(dev_out, oldflp->fld_src))
+ if (!dn_dev_islocal(dev, oldflp->fld_src))
continue;
- if ((dev_out->flags & IFF_LOOPBACK) &&
+ if ((dev->flags & IFF_LOOPBACK) &&
oldflp->fld_dst &&
- !dn_dev_islocal(dev_out, oldflp->fld_dst))
+ !dn_dev_islocal(dev, oldflp->fld_dst))
continue;
+
+ dev_out = dev;
break;
}
read_unlock(&dev_base_lock);
@@ -1468,7 +1471,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct dn_route *rt = (struct dn_route *)skb->dst;
struct rtmsg *r;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
long expires;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
@@ -1509,19 +1512,19 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (rt->fl.iif)
RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
/*
* This is called by both endnodes and routers now.
*/
-int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
{
struct rtattr **rta = arg;
struct rtmsg *rtm = NLMSG_DATA(nlh);
@@ -1537,7 +1540,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
return -ENOBUFS;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
cb = DN_SKB_CB(skb);
if (rta[RTA_SRC-1])
@@ -1812,6 +1815,13 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+
+#ifdef CONFIG_DECNET_ROUTER
+ rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+#else
+ rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
+ dn_cache_dump);
+#endif
}
void __exit dn_route_cleanup(void)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index b6c98ac93dc8..17a1932216d6 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -31,6 +31,7 @@
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
+#include <net/dn_route.h>
static struct fib_rules_ops dn_fib_rules_ops;
@@ -109,8 +110,6 @@ errout:
static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
FRA_GENERIC_POLICY,
- [FRA_SRC] = { .type = NLA_U16 },
- [FRA_DST] = { .type = NLA_U16 },
};
static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
@@ -133,7 +132,7 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+ if (frh->tos)
goto errout;
if (rule->table == RT_TABLE_UNSPEC) {
@@ -150,10 +149,10 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
}
- if (tb[FRA_SRC])
+ if (frh->src_len)
r->src = nla_get_le16(tb[FRA_SRC]);
- if (tb[FRA_DST])
+ if (frh->dst_len)
r->dst = nla_get_le16(tb[FRA_DST]);
r->src_len = frh->src_len;
@@ -176,10 +175,10 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (r->dst_len != frh->dst_len))
return 0;
- if (tb[FRA_SRC] && (r->src != nla_get_le16(tb[FRA_SRC])))
+ if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
return 0;
- if (tb[FRA_DST] && (r->dst != nla_get_le16(tb[FRA_DST])))
+ if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
return 0;
return 1;
@@ -241,20 +240,22 @@ static u32 dn_fib_rule_default_pref(void)
return 0;
}
-int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static void dn_fib_rule_flush_cache(void)
{
- return fib_rules_dump(skb, cb, AF_DECnet);
+ dn_rt_cache_flush(-1);
}
static struct fib_rules_ops dn_fib_rules_ops = {
.family = AF_DECnet,
.rule_size = sizeof(struct dn_fib_rule),
+ .addr_size = sizeof(u16),
.action = dn_fib_rule_action,
.match = dn_fib_rule_match,
.configure = dn_fib_rule_configure,
.compare = dn_fib_rule_compare,
.fill = dn_fib_rule_fill,
.default_pref = dn_fib_rule_default_pref,
+ .flush_cache = dn_fib_rule_flush_cache,
.nlgroup = RTNLGRP_DECnet_RULE,
.policy = dn_fib_rule_policy,
.rules_list = &dn_fib_rules,
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 780a141f8342..d6615c9361e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -28,6 +28,7 @@
#include <asm/uaccess.h>
#include <linux/route.h> /* RTF_xxx */
#include <net/neighbour.h>
+#include <net/netlink.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/fib_rules.h>
@@ -295,7 +296,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
rtm = NLMSG_DATA(nlh);
@@ -337,19 +338,19 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
nhp->rtnh_ifindex = nh->nh_oif;
if (nh->nh_gw)
RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
- nhp->rtnh_len = skb->tail - (unsigned char *)nhp;
+ nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
} endfor_nexthops(fi);
mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 0e62def05a58..696234688cf6 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
{
struct sk_buff *skb = NULL;
size_t size;
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
struct nlmsghdr *nlh;
unsigned char *ptr;
struct nf_dn_rtmsg *rtm;
@@ -48,7 +48,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
rtm->nfdn_ifindex = rt_skb->dev->ifindex;
ptr = NFDN_RTMSG(rtm);
- memcpy(ptr, rt_skb->data, rt_skb->len);
+ skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
@@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook,
static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
{
- struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
int rv = 0;
dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
- dnrmg_receive_user_sk, THIS_MODULE);
+ dnrmg_receive_user_sk, NULL, THIS_MODULE);
if (dnrmg == NULL) {
printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
return -ENOMEM;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index bc12e36263f0..b5524f32ac2d 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
if (err)
goto out_free;
- skb_get_timestamp(skb, &sk->sk_stamp);
+ sk->sk_stamp = skb->tstamp;
if (msg->msg_name)
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -345,7 +345,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
eb = (struct ec_cb *)&skb->cb;
@@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
fh->cb = cb;
fh->port = port;
if (sock->type != SOCK_DGRAM) {
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->len = 0;
} else if (res < 0)
goto out_free;
@@ -727,6 +727,9 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
case SIOCGSTAMP:
return sock_get_timestamp(sk, argp);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, argp);
+
case SIOCSIFADDR:
case SIOCGIFADDR:
return ec_dev_ioctl(sock, cmd, argp);
@@ -845,7 +848,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
{
- struct iphdr *ip = skb->nh.iph;
+ struct iphdr *ip = ip_hdr(skb);
unsigned char stn = ntohl(ip->saddr) & 0xff;
struct sock *sk;
struct sk_buff *newskb;
@@ -940,10 +943,10 @@ static void aun_data_available(struct sock *sk, int slen)
printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
}
- data = skb->h.raw + sizeof(struct udphdr);
+ data = skb_transport_header(skb) + sizeof(struct udphdr);
ah = (struct aunhdr *)data;
len = skb->len - sizeof(struct udphdr);
- ip = skb->nh.iph;
+ ip = ip_hdr(skb);
switch (ah->code)
{
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7391f55904d1..0ac2524f3b68 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -156,7 +156,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
struct ethhdr *eth;
unsigned char *rawp;
- skb->mac.raw = skb->data;
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
eth = eth_hdr(skb);
@@ -228,7 +229,7 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
eth = (struct ethhdr *)
(((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
- if (type == __constant_htons(ETH_P_802_3))
+ if (type == htons(ETH_P_802_3))
return -1;
eth->h_proto = type;
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index a64be6cdf078..1438adedbc83 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -38,7 +38,7 @@ config IEEE80211_CRYPT_WEP
Include software based cipher suites in support of IEEE
802.11's WEP. This is needed for WEP as well as 802.1x.
- This can be compiled as a modules and it will be called
+ This can be compiled as a module and it will be called
"ieee80211_crypt_wep".
config IEEE80211_CRYPT_CCMP
@@ -51,12 +51,13 @@ config IEEE80211_CRYPT_CCMP
(aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with CCMP enabled
networks.
- This can be compiled as a modules and it will be called
+ This can be compiled as a module and it will be called
"ieee80211_crypt_ccmp".
config IEEE80211_CRYPT_TKIP
tristate "IEEE 802.11i TKIP encryption"
- depends on IEEE80211 && NET_RADIO
+ depends on IEEE80211
+ select WIRELESS_EXT
select CRYPTO
select CRYPTO_MICHAEL_MIC
select CRYPTO_ECB
@@ -66,7 +67,7 @@ config IEEE80211_CRYPT_TKIP
(aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled
networks.
- This can be compiled as a modules and it will be called
+ This can be compiled as a module and it will be called
"ieee80211_crypt_tkip".
source "net/ieee80211/softmac/Kconfig"
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index 5ed0a98b2d76..df5592c9339f 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -1,7 +1,7 @@
/*
* Host AP crypto routines
*
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
* Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 35aa3426c3fa..b016b4104de6 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based CCMP encryption implementation for Host AP driver
*
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -338,7 +338,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
if (ccmp_replay_check(pn, key->rx_pn)) {
if (net_ratelimit()) {
- printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT
+ IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=" MAC_FMT
" previous PN %02x%02x%02x%02x%02x%02x "
"received PN %02x%02x%02x%02x%02x%02x\n",
MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn),
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index fc1f99a59732..5a48d8e0aec1 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based TKIP encryption implementation for Host AP driver
*
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -465,7 +465,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
if (net_ratelimit()) {
- printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT
+ IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=" MAC_FMT
" previous TSC %08x%04x received TSC "
"%08x%04x\n", MAC_ARG(hdr->addr2),
tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
@@ -507,7 +507,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
tkey->rx_phase1_done = 0;
}
if (net_ratelimit()) {
- printk(KERN_DEBUG "TKIP: ICV error detected: STA="
+ IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA="
MAC_FMT "\n", MAC_ARG(hdr->addr2));
}
tkey->dot11RSNAStatsTKIPICVErrors++;
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index ec6d8851a061..8d182459344e 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based WEP encryption implementation for Host AP driver
*
- * Copyright (c) 2002-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -152,7 +152,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
return -1;
/* Copy the IV into the first 3 bytes of the key */
- memcpy(key, skb->data + hdr_len, 3);
+ skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
/* Copy rest of the WEP key (the secret part) */
memcpy(key + 3, wep->key, wep->key_len);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index b1c6d1f717d9..7ec6610841ba 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -5,8 +5,8 @@
Portions of this file are based on the WEP enablement code provided by the
Host AP project hostap-drivers v0.1.3
Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- <jkmaline@cc.hut.fi>
- Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ <j@w1.fi>
+ Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
@@ -229,6 +229,7 @@ void free_ieee80211(struct net_device *dev)
static int debug = 0;
u32 ieee80211_debug_level = 0;
+EXPORT_SYMBOL_GPL(ieee80211_debug_level);
static struct proc_dir_entry *ieee80211_proc = NULL;
static int show_debug_level(char *page, char **start, off_t offset,
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 4084909f6f92..f2de2e48b021 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -3,8 +3,8 @@
* for Intersil Prism2/2.5/3 - hostap.o module, common routines
*
* Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <jkmaline@cc.hut.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * <j@w1.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
* Copyright (c) 2004-2005, Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
u16 fc = le16_to_cpu(hdr->frame_ctl);
skb->dev = ieee->dev;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, ieee80211_get_hdrlen(fc));
skb->pkt_type = PACKET_OTHERHOST;
skb->protocol = __constant_htons(ETH_P_80211_RAW);
@@ -606,12 +606,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (frag == 0) {
/* copy first fragment (including full headers) into
* beginning of the fragment cache skb */
- memcpy(skb_put(frag_skb, flen), skb->data, flen);
+ skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen);
} else {
/* append frame payload to the end of the fragment
* cache skb */
- memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
- flen);
+ skb_copy_from_linear_data_offset(skb, hdrlen,
+ skb_put(frag_skb, flen), flen);
}
dev_kfree_skb_any(skb);
skb = NULL;
@@ -759,8 +759,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) {
/* Non-standard frame: get addr4 from its bogus location after
* the payload */
- memcpy(skb->data + ETH_ALEN,
- skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+ skb_copy_to_linear_data_offset(skb, ETH_ALEN,
+ skb->data + skb->len - ETH_ALEN,
+ ETH_ALEN);
skb_trim(skb, skb->len - ETH_ALEN);
}
#endif
@@ -789,10 +790,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (skb2 != NULL) {
/* send to wireless media */
- skb2->protocol = __constant_htons(ETH_P_802_3);
- skb2->mac.raw = skb2->nh.raw = skb2->data;
- /* skb2->nh.raw = skb2->data + ETH_HLEN; */
skb2->dev = dev;
+ skb2->protocol = __constant_htons(ETH_P_802_3);
+ skb_reset_mac_header(skb2);
+ skb_reset_network_header(skb2);
+ /* skb2->network_header += ETH_HLEN; */
dev_queue_xmit(skb2);
}
#endif
@@ -800,7 +802,6 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (skb) {
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- skb->dev = dev;
skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */
if (netif_rx(skb) == NET_RX_DROP) {
/* netif_rx always succeeds, but it might drop
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 0292d6348e12..a4c3c51140a3 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -225,10 +225,10 @@ static int ieee80211_classify(struct sk_buff *skb)
struct iphdr *ip;
eth = (struct ethhdr *)skb->data;
- if (eth->h_proto != __constant_htons(ETH_P_IP))
+ if (eth->h_proto != htons(ETH_P_IP))
return 0;
- ip = skb->nh.iph;
+ ip = ip_hdr(skb);
switch (ip->tos & 0xfc) {
case 0x20:
return 2;
@@ -309,8 +309,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
}
/* Save source and destination addresses */
- memcpy(dest, skb->data, ETH_ALEN);
- memcpy(src, skb->data + ETH_ALEN, ETH_ALEN);
+ skb_copy_from_linear_data(skb, dest, ETH_ALEN);
+ skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
if (host_encrypt || host_build_iv)
fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
@@ -363,7 +363,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
snapped = 1;
ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
ether_type);
- memcpy(skb_put(skb_new, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
if (res < 0) {
IEEE80211_ERROR("msdu encryption failed\n");
@@ -492,7 +492,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
bytes -= SNAP_SIZE + sizeof(u16);
}
- memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+ skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes);
/* Advance the SKB... */
skb_pull(skb, bytes);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 40d7a55fe03e..cee5e13bc427 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -5,8 +5,8 @@
Portions of this file are based on the WEP enablement code provided by the
Host AP project hostap-drivers v0.1.3
Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- <jkmaline@cc.hut.fi>
- Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ <j@w1.fi>
+ Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9e8ef509c51d..e62aee0ec4c5 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -574,6 +574,33 @@ config TCP_CONG_VENO
loss packets.
See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
+config TCP_CONG_YEAH
+ tristate "YeAH TCP"
+ depends on EXPERIMENTAL
+ default n
+ ---help---
+ YeAH-TCP is a sender-side high-speed enabled TCP congestion control
+ algorithm, which uses a mixed loss/delay approach to compute the
+ congestion window. It's design goals target high efficiency,
+ internal, RTT and Reno fairness, resilience to link loss while
+ keeping network elements load as low as possible.
+
+ For further details look here:
+ http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+
+config TCP_CONG_ILLINOIS
+ tristate "TCP Illinois"
+ depends on EXPERIMENTAL
+ default n
+ ---help---
+ TCP-Illinois is a sender-side modificatio of TCP Reno for
+ high speed long delay links. It uses round-trip-time to
+ adjust the alpha and beta parameters to achieve a higher average
+ throughput and maintain fairness.
+
+ For further details see:
+ http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+
choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7a068626feea..4ff6c151d7f3 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
+obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf358c84c440..041fba3fa0aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -87,11 +87,11 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/system.h>
-#include <linux/smp_lock.h>
#include <linux/inet.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
@@ -217,6 +217,26 @@ out:
return err;
}
+u32 inet_ehash_secret __read_mostly;
+EXPORT_SYMBOL(inet_ehash_secret);
+
+/*
+ * inet_ehash_secret must be set exactly once
+ * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
+ */
+void build_ehash_secret(void)
+{
+ u32 rnd;
+ do {
+ get_random_bytes(&rnd, sizeof(rnd));
+ } while (rnd == 0);
+ spin_lock_bh(&inetsw_lock);
+ if (!inet_ehash_secret)
+ inet_ehash_secret = rnd;
+ spin_unlock_bh(&inetsw_lock);
+}
+EXPORT_SYMBOL(build_ehash_secret);
+
/*
* Create an inet socket.
*/
@@ -233,6 +253,11 @@ static int inet_create(struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+ build_ehash_secret();
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
@@ -755,6 +780,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
err = sock_get_timestamp(sk, (struct timeval __user *)arg);
break;
+ case SIOCGSTAMPNS:
+ err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+ break;
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
@@ -1109,7 +1137,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (ihl < sizeof(*iph))
goto out;
@@ -1117,8 +1145,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
- skb->h.raw = __skb_pull(skb, ihl);
- iph = skb->nh.iph;
+ __skb_pull(skb, ihl);
+ skb_reset_transport_header(skb);
+ iph = ip_hdr(skb);
proto = iph->protocol & (MAX_INET_PROTOS - 1);
err = -EPROTONOSUPPORT;
@@ -1152,7 +1181,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (ihl < sizeof(*iph))
goto out;
@@ -1160,8 +1189,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
- skb->h.raw = __skb_pull(skb, ihl);
- iph = skb->nh.iph;
+ __skb_pull(skb, ihl);
+ skb_reset_transport_header(skb);
+ iph = ip_hdr(skb);
id = ntohs(iph->id);
proto = iph->protocol & (MAX_INET_PROTOS - 1);
segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -1177,17 +1207,57 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
skb = segs;
do {
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->id = htons(id++);
iph->tot_len = htons(skb->len - skb->mac_len);
iph->check = 0;
- iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
} while ((skb = skb->next));
out:
return segs;
}
+unsigned long snmp_fold_field(void *mib[], int offt)
+{
+ unsigned long res = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+ res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+ res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+ }
+ return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field);
+
+int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+ BUG_ON(ptr == NULL);
+ ptr[0] = __alloc_percpu(mibsize);
+ if (!ptr[0])
+ goto err0;
+ ptr[1] = __alloc_percpu(mibsize);
+ if (!ptr[1])
+ goto err1;
+ return 0;
+err1:
+ free_percpu(ptr[0]);
+ ptr[0] = NULL;
+err0:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_init);
+
+void snmp_mib_free(void *ptr[2])
+{
+ BUG_ON(ptr == NULL);
+ free_percpu(ptr[0]);
+ free_percpu(ptr[1]);
+ ptr[0] = ptr[1] = NULL;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_free);
+
#ifdef CONFIG_IP_MULTICAST
static struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
@@ -1214,28 +1284,47 @@ static struct net_protocol icmp_protocol = {
static int __init init_ipv4_mibs(void)
{
- net_statistics[0] = alloc_percpu(struct linux_mib);
- net_statistics[1] = alloc_percpu(struct linux_mib);
- ip_statistics[0] = alloc_percpu(struct ipstats_mib);
- ip_statistics[1] = alloc_percpu(struct ipstats_mib);
- icmp_statistics[0] = alloc_percpu(struct icmp_mib);
- icmp_statistics[1] = alloc_percpu(struct icmp_mib);
- tcp_statistics[0] = alloc_percpu(struct tcp_mib);
- tcp_statistics[1] = alloc_percpu(struct tcp_mib);
- udp_statistics[0] = alloc_percpu(struct udp_mib);
- udp_statistics[1] = alloc_percpu(struct udp_mib);
- udplite_statistics[0] = alloc_percpu(struct udp_mib);
- udplite_statistics[1] = alloc_percpu(struct udp_mib);
- if (!
- (net_statistics[0] && net_statistics[1] && ip_statistics[0]
- && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
- && udp_statistics[0] && udp_statistics[1]
- && udplite_statistics[0] && udplite_statistics[1] ) )
- return -ENOMEM;
-
- (void) tcp_mib_init();
+ if (snmp_mib_init((void **)net_statistics,
+ sizeof(struct linux_mib),
+ __alignof__(struct linux_mib)) < 0)
+ goto err_net_mib;
+ if (snmp_mib_init((void **)ip_statistics,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp_mib_init((void **)icmp_statistics,
+ sizeof(struct icmp_mib),
+ __alignof__(struct icmp_mib)) < 0)
+ goto err_icmp_mib;
+ if (snmp_mib_init((void **)tcp_statistics,
+ sizeof(struct tcp_mib),
+ __alignof__(struct tcp_mib)) < 0)
+ goto err_tcp_mib;
+ if (snmp_mib_init((void **)udp_statistics,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udp_mib;
+ if (snmp_mib_init((void **)udplite_statistics,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udplite_mib;
+
+ tcp_mib_init();
return 0;
+
+err_udplite_mib:
+ snmp_mib_free((void **)udp_statistics);
+err_udp_mib:
+ snmp_mib_free((void **)tcp_statistics);
+err_tcp_mib:
+ snmp_mib_free((void **)icmp_statistics);
+err_icmp_mib:
+ snmp_mib_free((void **)ip_statistics);
+err_ip_mib:
+ snmp_mib_free((void **)net_statistics);
+err_net_mib:
+ return -ENOMEM;
}
static int ipv4_proc_init(void);
@@ -1336,7 +1425,7 @@ static int __init inet_init(void)
* Initialise per-cpu ipv4 mibs
*/
- if(init_ipv4_mibs())
+ if (init_ipv4_mibs())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
ipv4_proc_init();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d0..6da8ff597ad3 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
char buf[60];
} tmp_iph;
- top_iph = skb->nh.iph;
+ top_iph = ip_hdr(skb);
iph = &tmp_iph.iph;
iph->tos = top_iph->tos;
@@ -152,9 +152,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
ah = (struct ip_auth_hdr*)skb->data;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
- ihl = skb->data - skb->nh.raw;
+ ihl = skb->data - skb_network_header(skb);
memcpy(work_buf, iph, ihl);
iph->ttl = 0;
@@ -181,7 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
}
}
((struct iphdr*)work_buf)->protocol = ah->nexthdr;
- skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl);
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), work_buf, ihl);
+ skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + ihl);
return 0;
@@ -196,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a3488a83f49..7110779a0244 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
- if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
- saddr = skb->nh.iph->saddr;
+ if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
+ saddr = ip_hdr(skb)->saddr;
break;
case 1: /* Restrict announcements of saddr in same subnet */
if (!skb)
break;
- saddr = skb->nh.iph->saddr;
+ saddr = ip_hdr(skb)->saddr;
if (inet_addr_type(saddr) == RTN_LOCAL) {
/* saddr should be known to target */
if (inet_addr_onlink(in_dev, target, saddr))
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
return NULL;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
skb->dev = dev;
skb->protocol = htons(ETH_P_ARP);
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb)
if (in_dev == NULL)
goto out;
- arp = skb->nh.arph;
+ arp = arp_hdr(skb);
switch (dev_type) {
default:
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
(2 * sizeof(u32)))))
goto freeskb;
- arp = skb->nh.arph;
+ arp = arp_hdr(skb);
if (arp->ar_hln != dev->addr_len ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
goto out;
}
- switch(cmd) {
+ switch (cmd) {
case SIOCDARP:
err = arp_req_delete(&r, dev);
break;
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
/* ------------------------------------------------------------------------ */
-static struct seq_operations arp_seq_ops = {
+static const struct seq_operations arp_seq_ops = {
.start = arp_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c976dd7e9758..e1f18489db1d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -92,6 +92,33 @@ int cipso_v4_rbm_optfmt = 0;
int cipso_v4_rbm_strictvalid = 1;
/*
+ * Protocol Constants
+ */
+
+/* Maximum size of the CIPSO IP option, derived from the fact that the maximum
+ * IPv4 header size is 60 bytes and the base IPv4 header is 20 bytes long. */
+#define CIPSO_V4_OPT_LEN_MAX 40
+
+/* Length of the base CIPSO option, this includes the option type (1 byte), the
+ * option length (1 byte), and the DOI (4 bytes). */
+#define CIPSO_V4_HDR_LEN 6
+
+/* Base length of the restrictive category bitmap tag (tag #1). */
+#define CIPSO_V4_TAG_RBM_BLEN 4
+
+/* Base length of the enumerated category tag (tag #2). */
+#define CIPSO_V4_TAG_ENUM_BLEN 4
+
+/* Base length of the ranged categories bitmap tag (tag #5). */
+#define CIPSO_V4_TAG_RNG_BLEN 4
+/* The maximum number of category ranges permitted in the ranged category tag
+ * (tag #5). You may note that the IETF draft states that the maximum number
+ * of category ranges is 7, but if the low end of the last category range is
+ * zero then it is possibile to fit 8 category ranges because the zero should
+ * be omitted. */
+#define CIPSO_V4_TAG_RNG_CAT_MAX 8
+
+/*
* Helper Functions
*/
@@ -1109,16 +1136,15 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
unsigned char *net_cat,
u32 net_cat_len)
{
- /* The constant '16' is not random, it is the maximum number of
- * high/low category range pairs as permitted by the CIPSO draft based
- * on a maximum IPv4 header length of 60 bytes - the BUG_ON() assertion
- * does a sanity check to make sure we don't overflow the array. */
int iter = -1;
- u16 array[16];
+ u16 array[CIPSO_V4_TAG_RNG_CAT_MAX * 2];
u32 array_cnt = 0;
u32 cat_size = 0;
- BUG_ON(net_cat_len > 30);
+ /* make sure we don't overflow the 'array[]' variable */
+ if (net_cat_len >
+ (CIPSO_V4_OPT_LEN_MAX - CIPSO_V4_HDR_LEN - CIPSO_V4_TAG_RNG_BLEN))
+ return -ENOSPC;
for (;;) {
iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1);
@@ -1174,7 +1200,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
u16 cat_low;
u16 cat_high;
- for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
+ for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
if ((net_iter + 4) <= net_cat_len)
cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
@@ -1196,9 +1222,6 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
* Protocol Handling Functions
*/
-#define CIPSO_V4_OPT_LEN_MAX 40
-#define CIPSO_V4_HDR_LEN 6
-
/**
* cipso_v4_gentag_hdr - Generate a CIPSO option header
* @doi_def: the DOI definition
@@ -1676,7 +1699,7 @@ validate_return:
*/
void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
- if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
if (gateway)
@@ -1933,6 +1956,11 @@ int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
&cipso_ptr[6],
secattr);
break;
+ case CIPSO_V4_TAG_RANGE:
+ ret_val = cipso_v4_parsetag_rng(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
}
skbuff_getattr_return:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e10794dc5f64..7f95e6e9beeb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -48,7 +48,6 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
@@ -62,7 +61,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
struct ipv4_devconf ipv4_devconf = {
.accept_redirects = 1,
@@ -502,8 +501,10 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
goto errout;
ifm = nlmsg_data(nlh);
- if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+ if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
+ err = -EINVAL;
goto errout;
+ }
dev = __dev_get_by_index(ifm->ifa_index);
if (dev == NULL) {
@@ -631,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
dev_load(ifr.ifr_name);
#endif
- switch(cmd) {
+ switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
case SIOCGIFBRDADDR: /* Get the broadcast address */
case SIOCGIFDSTADDR: /* Get the destination address */
@@ -706,7 +707,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
goto done;
- switch(cmd) {
+ switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local;
goto rarok;
@@ -909,7 +910,7 @@ no_in_dev:
*/
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
@@ -988,7 +989,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local,
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if ((in_dev = __in_dev_get_rcu(dev))) {
addr = confirm_addr_indev(in_dev, dst, local, scope);
if (addr)
@@ -1181,34 +1182,29 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
int s_ip_idx, s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_ip_idx = 0;
- rcu_read_lock();
- if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
- rcu_read_unlock();
- continue;
- }
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
+ goto cont;
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
ifa = ifa->ifa_next, ip_idx++) {
if (ip_idx < s_ip_idx)
- continue;
+ goto cont;
if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- RTM_NEWADDR, NLM_F_MULTI) <= 0) {
- rcu_read_unlock();
+ RTM_NEWADDR, NLM_F_MULTI) <= 0)
goto done;
- }
}
- rcu_read_unlock();
+cont:
+ idx++;
}
done:
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
@@ -1239,19 +1235,6 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
}
-static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
- [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
- .dumpit = inet_dump_fib, },
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
-#endif
-};
-
#ifdef CONFIG_SYSCTL
void inet_forward_change(void)
@@ -1263,7 +1246,7 @@ void inet_forward_change(void)
ipv4_devconf_dflt.forwarding = on;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
struct in_device *in_dev;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -1634,7 +1617,10 @@ void __init devinet_init(void)
{
register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
- rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+
+ rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+ rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
#ifdef CONFIG_SYSCTL
devinet_sysctl.sysctl_header =
register_sysctl_table(devinet_sysctl.devinet_root_dir);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb8..47c95e8ef045 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -21,13 +21,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct blkcipher_desc desc;
struct esp_data *esp;
struct sk_buff *trailer;
+ u8 *tail;
int blksize;
int clen;
int alen;
int nfrags;
/* Strip IP+ESP header. */
- __skb_pull(skb, skb->h.raw - skb->data);
+ __skb_pull(skb, skb_transport_offset(skb));
/* Now skb is pure payload to encrypt */
err = -ENOMEM;
@@ -49,19 +50,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
goto error;
/* Fill padding... */
+ tail = skb_tail_pointer(trailer);
do {
int i;
for (i=0; i<clen-skb->len - 2; i++)
- *(u8*)(trailer->tail + i) = i+1;
+ tail[i] = i + 1;
} while (0);
- *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ tail[clen - skb->len - 2] = (clen - skb->len) - 2;
pskb_put(skb, trailer, clen - skb->len);
- __skb_push(skb, skb->data - skb->nh.raw);
- top_iph = skb->nh.iph;
- esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+ __skb_push(skb, skb->data - skb_network_header(skb));
+ top_iph = ip_hdr(skb);
+ esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
+ top_iph->ihl * 4);
top_iph->tot_len = htons(skb->len + alen);
- *(u8*)(trailer->tail - 1) = top_iph->protocol;
+ *(skb_tail_pointer(trailer) - 1) = top_iph->protocol;
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
@@ -217,12 +220,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
/* ... check padding bits here. Silly. :-) */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh = (void *)(skb->nh.raw + ihl);
+ struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
/*
* 1) if the NAT-T peer's IP or port changed then
@@ -260,7 +263,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
iph->protocol = nexthdr[1];
pskb_trim(skb, skb->len - alen - padlen - 2);
- skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;
+ __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+ skb_set_transport_header(skb, -ihl);
return 0;
@@ -268,32 +272,33 @@ out:
return -EINVAL;
}
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- int enclen = 0;
+ u32 align = max_t(u32, blksize, esp->conf.padlen);
+ u32 rem;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ rem = mtu & (align - 1);
+ mtu &= ~(align - 1);
switch (x->props.mode) {
case XFRM_MODE_TUNNEL:
- mtu = ALIGN(mtu +2, blksize);
break;
default:
case XFRM_MODE_TRANSPORT:
/* The worst case */
- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+ mtu -= blksize - 4;
+ mtu += min_t(u32, blksize - 4, rem);
break;
case XFRM_MODE_BEET:
/* The worst case. */
- enclen = IPV4_BEET_PHMAXLEN;
- mtu = ALIGN(mtu + enclen + 2, blksize);
+ mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
break;
}
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
-
- return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
+ return mtu - 2;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -302,8 +307,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
@@ -336,6 +341,7 @@ static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
struct crypto_blkcipher *tfm;
+ u32 align;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -402,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x)
x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
+ else if (x->props.mode == XFRM_MODE_BEET)
+ x->props.header_len += IPV4_BEET_PHMAXLEN;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
@@ -417,7 +425,10 @@ static int esp_init_state(struct xfrm_state *x)
}
}
x->data = esp;
- x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+ align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+ if (esp->conf.padlen)
+ align = max_t(u32, align, esp->conf.padlen);
+ x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
return 0;
error:
@@ -434,7 +445,7 @@ static struct xfrm_type esp_type =
.proto = IPPROTO_ESP,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_max_size = esp4_get_max_size,
+ .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1fba6439fc57..837f2957fa83 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,7 +34,6 @@
#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -46,6 +45,7 @@
#include <net/icmp.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/rtnetlink.h>
#define FFprint(a...) printk(KERN_DEBUG a)
@@ -493,6 +493,11 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
cfg->fc_nlinfo.nlh = nlh;
+ if (cfg->fc_type > RTN_MAX) {
+ err = -EINVAL;
+ goto errout;
+ }
+
nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
switch (attr->nla_type) {
case RTA_DST:
@@ -535,7 +540,7 @@ errout:
return err;
}
-int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_config cfg;
struct fib_table *tb;
@@ -556,7 +561,7 @@ errout:
return err;
}
-int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_config cfg;
struct fib_table *tb;
@@ -577,7 +582,7 @@ errout:
return err;
}
-int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -771,6 +776,12 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
.nl_u = { .ip4_u = { .daddr = frn->fl_addr,
.tos = frn->fl_tos,
.scope = frn->fl_scope } } };
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ res.r = NULL;
+#endif
+
+ frn->err = -ENOENT;
if (tb) {
local_bh_disable();
@@ -782,6 +793,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
frn->nh_sel = res.nh_sel;
frn->type = res.type;
frn->scope = res.scope;
+ fib_res_put(&res);
}
local_bh_enable();
}
@@ -796,7 +808,10 @@ static void nl_fib_input(struct sock *sk, int len)
struct fib_table *tb;
skb = skb_dequeue(&sk->sk_receive_queue);
- nlh = (struct nlmsghdr *)skb->data;
+ if (skb == NULL)
+ return;
+
+ nlh = nlmsg_hdr(skb);
if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
kfree_skb(skb);
@@ -808,7 +823,7 @@ static void nl_fib_input(struct sock *sk, int len)
nl_fib_lookup(frn, tb);
- pid = nlh->nlmsg_pid; /*pid of sending process */
+ pid = NETLINK_CB(skb).pid; /* pid of sending process */
NETLINK_CB(skb).pid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
@@ -816,7 +831,8 @@ static void nl_fib_input(struct sock *sk, int len)
static void nl_fib_lookup_init(void)
{
- netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
+ netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+ THIS_MODULE);
}
static void fib_disable_ip(struct net_device *dev, int force)
@@ -914,6 +930,10 @@ void __init ip_fib_init(void)
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
nl_fib_lookup_init();
+
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
}
EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a4949f957ab5..9cfecf1215c9 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1027,7 +1027,7 @@ out:
return 0;
}
-static struct seq_operations fib_seq_ops = {
+static const struct seq_operations fib_seq_ops = {
.start = fib_seq_start,
.next = fib_seq_next,
.stop = fib_seq_stop,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b837c33e0404..33083ad52e9f 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -171,8 +171,6 @@ static struct fib_table *fib_empty_table(void)
static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
FRA_GENERIC_POLICY,
- [FRA_SRC] = { .type = NLA_U32 },
- [FRA_DST] = { .type = NLA_U32 },
[FRA_FLOW] = { .type = NLA_U32 },
};
@@ -183,8 +181,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->src_len > 32 || frh->dst_len > 32 ||
- (frh->tos & ~IPTOS_TOS_MASK))
+ if (frh->tos & ~IPTOS_TOS_MASK)
goto errout;
if (rule->table == RT_TABLE_UNSPEC) {
@@ -201,10 +198,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
}
- if (tb[FRA_SRC])
+ if (frh->src_len)
rule4->src = nla_get_be32(tb[FRA_SRC]);
- if (tb[FRA_DST])
+ if (frh->dst_len)
rule4->dst = nla_get_be32(tb[FRA_DST]);
#ifdef CONFIG_NET_CLS_ROUTE
@@ -242,10 +239,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
#endif
- if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC])))
+ if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC])))
return 0;
- if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST])))
+ if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST])))
return 0;
return 1;
@@ -277,11 +274,6 @@ nla_put_failure:
return -ENOBUFS;
}
-int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return fib_rules_dump(skb, cb, AF_INET);
-}
-
static u32 fib4_rule_default_pref(void)
{
struct list_head *pos;
@@ -306,9 +298,15 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(4); /* flow */
}
+static void fib4_rule_flush_cache(void)
+{
+ rt_cache_flush(-1);
+}
+
static struct fib_rules_ops fib4_rules_ops = {
.family = AF_INET,
.rule_size = sizeof(struct fib4_rule),
+ .addr_size = sizeof(u32),
.action = fib4_rule_action,
.match = fib4_rule_match,
.configure = fib4_rule_configure,
@@ -316,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = {
.fill = fib4_rule_fill,
.default_pref = fib4_rule_default_pref,
.nlmsg_payload = fib4_rule_nlmsg_payload,
+ .flush_cache = fib4_rule_flush_cache,
.nlgroup = RTNLGRP_IPV4_RULE,
.policy = fib4_rule_policy,
.rules_list = &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2f1fdae6efa6..406ea7050aed 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -89,7 +89,7 @@ static const struct
{
int error;
u8 scope;
-} fib_props[RTA_MAX + 1] = {
+} fib_props[RTN_MAX + 1] = {
{
.error = 0,
.scope = RT_SCOPE_NOWHERE,
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
default:
printk(KERN_DEBUG "impossible 102\n");
return -EINVAL;
- };
+ }
}
return err;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 72b3036bbc09..9be7da7c3a8f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
* Patrick McHardy <kaber@trash.net>
*/
-#define VERSION "0.407"
+#define VERSION "0.408"
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn)
static int halve_threshold = 25;
static int inflate_threshold = 50;
-static int halve_threshold_root = 15;
-static int inflate_threshold_root = 25;
+static int halve_threshold_root = 8;
+static int inflate_threshold_root = 15;
static void __alias_free_mem(struct rcu_head *head)
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head)
static inline void tnode_free(struct tnode *tn)
{
- if(IS_LEAF(tn)) {
+ if (IS_LEAF(tn)) {
struct leaf *l = (struct leaf *) tn;
call_rcu_bh(&l->rcu, __leaf_free_rcu);
- }
- else
+ } else
call_rcu(&tn->rcu, __tnode_free_rcu);
}
@@ -459,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
struct tnode *old_tn;
int inflate_threshold_use;
int halve_threshold_use;
+ int max_resize;
if (!tn)
return NULL;
@@ -553,13 +553,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
inflate_threshold_use = inflate_threshold_root;
else
inflate_threshold_use = inflate_threshold;
err = 0;
- while ((tn->full_children > 0 &&
+ max_resize = 10;
+ while ((tn->full_children > 0 && max_resize-- &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
inflate_threshold_use * tnode_child_length(tn))) {
@@ -574,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
+ inflate_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
+ inflate_threshold, tn->bits);
+ }
+
check_tnode(tn);
/*
@@ -584,13 +594,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
halve_threshold_use = halve_threshold_root;
else
halve_threshold_use = halve_threshold;
err = 0;
- while (tn->bits > 1 &&
+ max_resize = 10;
+ while (tn->bits > 1 && max_resize-- &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold_use * tnode_child_length(tn)) {
@@ -605,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
+ halve_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
+ halve_threshold, tn->bits);
+ }
/* Only one child remains */
if (tn->empty_children == tnode_child_length(tn) - 1)
@@ -1123,6 +1142,9 @@ err:
return fa_head;
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
@@ -1527,7 +1549,6 @@ static int trie_leaf_remove(struct trie *t, t_key key)
t->revision++;
t->size--;
- preempt_disable();
tp = NODE_PARENT(n);
tnode_free((struct tnode *) n);
@@ -1537,11 +1558,13 @@ static int trie_leaf_remove(struct trie *t, t_key key)
rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
} else
rcu_assign_pointer(t->trie, NULL);
- preempt_enable();
return 1;
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
@@ -1720,6 +1743,9 @@ up:
return NULL; /* Ready. Root of trie */
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_flush(struct fib_table *tb)
{
struct trie *t = (struct trie *) tb->tb_data;
@@ -2032,12 +2058,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
{
struct node *n ;
- if(!t)
+ if (!t)
return NULL;
n = rcu_dereference(t->trie);
- if(!iter)
+ if (!iter)
return NULL;
if (n) {
@@ -2077,7 +2103,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
int i;
s->tnodes++;
- if(tn->bits < MAX_STAT_DEPTH)
+ if (tn->bits < MAX_STAT_DEPTH)
s->nodesizes[tn->bits]++;
for (i = 0; i < (1<<tn->bits); i++)
@@ -2243,7 +2269,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
{
static char buf[32];
- switch(s) {
+ switch (s) {
case RT_SCOPE_UNIVERSE: return "universe";
case RT_SCOPE_SITE: return "site";
case RT_SCOPE_LINK: return "link";
@@ -2333,7 +2359,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations fib_trie_seq_ops = {
+static const struct seq_operations fib_trie_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
@@ -2454,7 +2480,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations fib_route_seq_ops = {
+static const struct seq_operations fib_route_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0d..d38cbba92a4d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
ipc, rt, MSG_DONTWAIT) < 0)
ip_flush_pending_frames(icmp_socket->sk);
else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
- struct icmphdr *icmph = skb->h.icmph;
+ struct icmphdr *icmph = icmp_hdr(skb);
__wsum csum = 0;
struct sk_buff *skb1;
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_param->data.icmph.checksum = 0;
icmp_out_count(icmp_param->data.icmph.type);
- inet->tos = skb->nh.iph->tos;
+ inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
if (icmp_param->replyopts.optlen) {
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
- .tos = RT_TOS(skb->nh.iph->tos) } },
+ .tos = RT_TOS(ip_hdr(skb)->tos) } },
.proto = IPPROTO_ICMP };
security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
@@ -448,9 +448,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
* Check this, icmp_send is called from the most obscure devices
* sometimes.
*/
- iph = skb_in->nh.iph;
+ iph = ip_hdr(skb_in);
- if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
+ if ((u8 *)iph < skb_in->head ||
+ (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
goto out;
/*
@@ -484,7 +485,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
u8 _inner_type, *itp;
itp = skb_header_pointer(skb_in,
- skb_in->nh.raw +
+ skb_network_header(skb_in) +
(iph->ihl << 2) +
offsetof(struct icmphdr,
type) -
@@ -536,7 +537,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.data.icmph.un.gateway = info;
icmp_param.data.icmph.checksum = 0;
icmp_param.skb = skb_in;
- icmp_param.offset = skb_in->nh.raw - skb_in->data;
+ icmp_param.offset = skb_network_offset(skb_in);
icmp_out_count(icmp_param.data.icmph.type);
inet_sk(icmp_socket->sk)->tos = tos;
ipc.addr = iph->saddr;
@@ -613,7 +614,7 @@ static void icmp_unreach(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out_err;
- icmph = skb->h.icmph;
+ icmph = icmp_hdr(skb);
iph = (struct iphdr *)skb->data;
if (iph->ihl < 5) /* Mangled header, drop. */
@@ -676,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb)
printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
"type %u, code %u "
"error to a broadcast: %u.%u.%u.%u on %s\n",
- NIPQUAD(skb->nh.iph->saddr),
+ NIPQUAD(ip_hdr(skb)->saddr),
icmph->type, icmph->code,
NIPQUAD(iph->daddr),
skb->dev->name);
@@ -743,7 +744,7 @@ static void icmp_redirect(struct sk_buff *skb)
iph = (struct iphdr *)skb->data;
- switch (skb->h.icmph->code & 7) {
+ switch (icmp_hdr(skb)->code & 7) {
case ICMP_REDIR_NET:
case ICMP_REDIR_NETTOS:
/*
@@ -751,8 +752,8 @@ static void icmp_redirect(struct sk_buff *skb)
*/
case ICMP_REDIR_HOST:
case ICMP_REDIR_HOSTTOS:
- ip_rt_redirect(skb->nh.iph->saddr, iph->daddr,
- skb->h.icmph->un.gateway,
+ ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
+ icmp_hdr(skb)->un.gateway,
iph->saddr, skb->dev);
break;
}
@@ -780,7 +781,7 @@ static void icmp_echo(struct sk_buff *skb)
if (!sysctl_icmp_echo_ignore_all) {
struct icmp_bxm icmp_param;
- icmp_param.data.icmph = *skb->h.icmph;
+ icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
icmp_param.skb = skb;
icmp_param.offset = 0;
@@ -816,7 +817,7 @@ static void icmp_timestamp(struct sk_buff *skb)
icmp_param.data.times[2] = icmp_param.data.times[1];
if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
BUG();
- icmp_param.data.icmph = *skb->h.icmph;
+ icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
icmp_param.data.icmph.code = 0;
icmp_param.skb = skb;
@@ -943,7 +944,7 @@ int icmp_rcv(struct sk_buff *skb)
if (!pskb_pull(skb, sizeof(struct icmphdr)))
goto error;
- icmph = skb->h.icmph;
+ icmph = icmp_hdr(skb);
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1c6a084b5fb7..f4dd47453108 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ skb_reset_network_header(skb);
+ pip = ip_hdr(skb);
+ skb_put(skb, sizeof(struct iphdr) + 4);
pip->version = 4;
pip->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -331,8 +333,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
((u8*)&pip[1])[2] = 0;
((u8*)&pip[1])[3] = 0;
- pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig));
- skb->h.igmph = (struct igmphdr *)pig;
+ skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
+ skb_put(skb, sizeof(*pig));
+ pig = igmpv3_report_hdr(skb);
pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
pig->resv1 = 0;
pig->csum = 0;
@@ -343,16 +346,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
static int igmpv3_sendpack(struct sk_buff *skb)
{
- struct iphdr *pip = skb->nh.iph;
- struct igmphdr *pig = skb->h.igmph;
- int iplen, igmplen;
+ struct iphdr *pip = ip_hdr(skb);
+ struct igmphdr *pig = igmp_hdr(skb);
+ const int iplen = skb->tail - skb->network_header;
+ const int igmplen = skb->tail - skb->transport_header;
- iplen = skb->tail - (unsigned char *)skb->nh.iph;
pip->tot_len = htons(iplen);
ip_send_check(pip);
-
- igmplen = skb->tail - (unsigned char *)skb->h.igmph;
- pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+ pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
dst_output);
@@ -379,7 +380,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_auxwords = 0;
pgr->grec_nsrcs = 0;
pgr->grec_mca = pmc->multiaddr;
- pih = (struct igmpv3_report *)skb->h.igmph;
+ pih = igmpv3_report_hdr(skb);
pih->ngrec = htons(ntohs(pih->ngrec)+1);
*ppgr = pgr;
return skb;
@@ -412,7 +413,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (!*psf_list)
goto empty_source;
- pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
+ pih = skb ? igmpv3_report_hdr(skb) : NULL;
/* EX and TO_EX get a fresh packet, if needed */
if (truncate) {
@@ -664,7 +665,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ skb_put(skb, sizeof(struct iphdr) + 4);
iph->version = 4;
iph->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -827,8 +830,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
int len)
{
- struct igmphdr *ih = skb->h.igmph;
- struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
+ struct igmphdr *ih = igmp_hdr(skb);
+ struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
struct ip_mc_list *im;
__be32 group = ih->group;
int max_delay;
@@ -861,12 +864,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
return;
- ih3 = (struct igmpv3_query *) skb->h.raw;
+ ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs) {
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
+ ntohs(ih3->nsrcs)*sizeof(__be32)))
return;
- ih3 = (struct igmpv3_query *) skb->h.raw;
+ ih3 = igmpv3_query_hdr(skb);
}
max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
@@ -943,7 +946,7 @@ int igmp_rcv(struct sk_buff *skb)
goto drop;
}
- ih = skb->h.igmph;
+ ih = igmp_hdr(skb);
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_QUERY:
igmp_heard_query(in_dev, skb, len);
@@ -1255,9 +1258,9 @@ out:
*/
void ip_mc_rejoin_group(struct ip_mc_list *im)
{
+#ifdef CONFIG_IP_MULTICAST
struct in_device *in_dev = im->interface;
-#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
@@ -2285,9 +2288,8 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
struct ip_mc_list *im = NULL;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
- for (state->dev = dev_base, state->in_dev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->in_dev = NULL;
+ for_each_netdev(state->dev) {
struct in_device *in_dev;
in_dev = in_dev_get(state->dev);
if (!in_dev)
@@ -2313,7 +2315,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
read_unlock(&state->in_dev->mc_list_lock);
in_dev_put(state->in_dev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->in_dev = NULL;
break;
@@ -2397,7 +2399,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations igmp_mc_seq_ops = {
+static const struct seq_operations igmp_mc_seq_ops = {
.start = igmp_mc_seq_start,
.next = igmp_mc_seq_next,
.stop = igmp_mc_seq_stop,
@@ -2447,9 +2449,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
struct ip_mc_list *im = NULL;
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ state->im = NULL;
+ for_each_netdev(state->dev) {
struct in_device *idev;
idev = in_dev_get(state->dev);
if (unlikely(idev == NULL))
@@ -2485,7 +2487,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
read_unlock(&state->idev->mc_list_lock);
in_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
goto out;
@@ -2571,7 +2573,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations igmp_mcf_seq_ops = {
+static const struct seq_operations igmp_mcf_seq_ops = {
.start = igmp_mcf_seq_start,
.next = igmp_mcf_seq_next,
.stop = igmp_mcf_seq_stop,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5df71cd08da8..dbeacd8b0f90 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -27,6 +27,7 @@
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/inet6_hashtables.h>
+#include <net/netlink.h>
#include <linux/inet.h>
#include <linux/stddef.h>
@@ -60,7 +61,7 @@ static int inet_csk_diag_fill(struct sock *sk,
struct nlmsghdr *nlh;
void *info = NULL;
struct inet_diag_meminfo *minfo = NULL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
const struct inet_diag_handler *handler;
handler = inet_diag_table[unlh->nlmsg_type];
@@ -147,12 +148,12 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
@@ -163,7 +164,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
{
long tmo;
struct inet_diag_msg *r;
- const unsigned char *previous_tail = skb->tail;
+ const unsigned char *previous_tail = skb_tail_pointer(skb);
struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
unlh->nlmsg_type, sizeof(*r));
@@ -205,10 +206,10 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
&tw6->tw_v6_daddr);
}
#endif
- nlh->nlmsg_len = skb->tail - previous_tail;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
return skb->len;
nlmsg_failure:
- skb_trim(skb, previous_tail - skb->data);
+ nlmsg_trim(skb, previous_tail);
return -EMSGSIZE;
}
@@ -535,7 +536,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *inet = inet_sk(sk);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;
@@ -574,12 +575,12 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
&inet6_rsk(req)->rmt_addr);
}
#endif
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -805,68 +806,43 @@ done:
return skb->len;
}
-static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
- return 0;
+ int hdrlen = sizeof(struct inet_diag_req);
- if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
- goto err_inval;
+ if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
+ nlmsg_len(nlh) < hdrlen)
+ return -EINVAL;
if (inet_diag_table[nlh->nlmsg_type] == NULL)
return -ENOENT;
- if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
- goto err_inval;
-
- if (nlh->nlmsg_flags&NLM_F_DUMP) {
- if (nlh->nlmsg_len >
- (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
- struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
- sizeof(struct inet_diag_req));
- if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
- rta->rta_len < 8 ||
- rta->rta_len >
- (nlh->nlmsg_len -
- NLMSG_SPACE(sizeof(struct inet_diag_req))))
- goto err_inval;
- if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
- goto err_inval;
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (nlmsg_attrlen(nlh, hdrlen)) {
+ struct nlattr *attr;
+
+ attr = nlmsg_find_attr(nlh, hdrlen,
+ INET_DIAG_REQ_BYTECODE);
+ if (attr == NULL ||
+ nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+ inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
}
+
return netlink_dump_start(idiagnl, skb, nlh,
inet_diag_dump, NULL);
- } else
- return inet_diag_get_exact(skb, nlh);
-
-err_inval:
- return -EINVAL;
-}
-
-
-static inline void inet_diag_rcv_skb(struct sk_buff *skb)
-{
- if (skb->len >= NLMSG_SPACE(0)) {
- int err;
- struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
-
- if (nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len)
- return;
- err = inet_diag_rcv_msg(skb, nlh);
- if (err || nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, err);
}
+
+ return inet_diag_get_exact(skb, nlh);
}
static void inet_diag_rcv(struct sock *sk, int len)
{
- struct sk_buff *skb;
- unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+ unsigned int qlen = 0;
- while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
- inet_diag_rcv_skb(skb);
- kfree_skb(skb);
- }
+ do {
+ netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg);
+ } while (qlen);
}
static DEFINE_SPINLOCK(inet_diag_register_lock);
@@ -917,7 +893,7 @@ static int __init inet_diag_init(void)
goto out;
idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (idiagnl == NULL)
goto out_free_table;
err = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index db3ef96bdfd9..2f44e6128068 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock);
static int peer_total;
/* Exported for sysctl_net_ipv4. */
-int inet_peer_threshold = 65536 + 128; /* start to throw entries more
+int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
* aggressively at this stage */
-int inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */
-int inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */
+int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
+int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
+int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
static struct inet_peer *inet_peer_unused_head;
static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock);
static void peer_check_expire(unsigned long dummy);
static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
-/* Exported for sysctl_net_ipv4. */
-int inet_peer_gc_mintime = 10 * HZ,
- inet_peer_gc_maxtime = 120 * HZ;
/* Called from ip_output.c:ip_init */
void __init inet_initpeers(void)
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p)
spin_unlock_bh(&inet_peer_unused_lock);
}
-/* Called with local BH disabled and the pool lock held. */
-#define lookup(daddr) \
+/*
+ * Called with local BH disabled and the pool lock held.
+ * _stack is known to be NULL or not at compile time,
+ * so compiler will optimize the if (_stack) tests.
+ */
+#define lookup(_daddr,_stack) \
({ \
struct inet_peer *u, **v; \
- stackptr = stack; \
- *stackptr++ = &peer_root; \
+ if (_stack) { \
+ stackptr = _stack; \
+ *stackptr++ = &peer_root; \
+ } \
for (u = peer_root; u != peer_avl_empty; ) { \
- if (daddr == u->v4daddr) \
+ if (_daddr == u->v4daddr) \
break; \
- if ((__force __u32)daddr < (__force __u32)u->v4daddr) \
+ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
v = &u->avl_left; \
else \
v = &u->avl_right; \
- *stackptr++ = v; \
+ if (_stack) \
+ *stackptr++ = v; \
u = *v; \
} \
u; \
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p)
if (atomic_read(&p->refcnt) == 1) {
struct inet_peer **stack[PEER_MAXDEPTH];
struct inet_peer ***stackptr, ***delp;
- if (lookup(p->v4daddr) != p)
+ if (lookup(p->v4daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
if (p->avl_left == peer_avl_empty) {
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
/* Look up for the address quickly. */
read_lock_bh(&peer_pool_lock);
- p = lookup(daddr);
+ p = lookup(daddr, NULL);
if (p != peer_avl_empty)
atomic_inc(&p->refcnt);
read_unlock_bh(&peer_pool_lock);
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
write_lock_bh(&peer_pool_lock);
/* Check if an entry has suddenly appeared. */
- p = lookup(daddr);
+ p = lookup(daddr, stack);
if (p != peer_avl_empty)
goto out_free;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 369e721c4bab..9cb04df0054b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,14 +67,14 @@ int ip_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
/*
* According to the RFC, we must first decrease the TTL field. If
* that reaches zero, we must reply an ICMP control message telling
* that the packet's lifetime expired.
*/
- if (skb->nh.iph->ttl <= 1)
+ if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
if (!xfrm4_route_forward(skb))
@@ -85,10 +85,18 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
goto sr_failed;
+ if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
+ (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(dst_mtu(&rt->u.dst)));
+ goto drop;
+ }
+
/* We are about to mangle packet. Copy it! */
if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
goto drop;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
/* Decrease ttl after skb cow done */
ip_decrease_ttl(iph);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6f055380373..0231bdcb2ab7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -92,7 +92,7 @@ struct ipq {
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
- struct timeval stamp;
+ ktime_t stamp;
int iif;
unsigned int rid;
struct inet_peer *peer;
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
{
struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if(!qp)
+ if (!qp)
return NULL;
atomic_add(sizeof(struct ipq), &ip_frag_mem);
return qp;
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
* promoted read lock to write lock.
*/
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == qp_in->id &&
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
- qp->protocol == qp_in->protocol &&
- qp->user == qp_in->user) {
+ if (qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+ qp->protocol == qp_in->protocol &&
+ qp->user == qp_in->user) {
atomic_inc(&qp->refcnt);
write_unlock(&ipfrag_lock);
qp_in->last_in |= COMPLETE;
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
read_lock(&ipfrag_lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == id &&
- qp->saddr == saddr &&
- qp->daddr == daddr &&
- qp->protocol == protocol &&
- qp->user == user) {
+ if (qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+ qp->protocol == protocol &&
+ qp->user == user) {
atomic_inc(&qp->refcnt);
read_unlock(&ipfrag_lock);
return qp;
@@ -479,11 +479,11 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
goto err;
}
- offset = ntohs(skb->nh.iph->frag_off);
+ offset = ntohs(ip_hdr(skb)->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
offset <<= 3; /* offset is in 8-byte chunks */
- ihl = skb->nh.iph->ihl * 4;
+ ihl = ip_hdrlen(skb);
/* Determine the position of this fragment. */
end = offset + skb->len - ihl;
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = NULL;
- for(next = qp->fragments; next != NULL; next = next->next) {
+ for (next = qp->fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (skb->dev)
qp->iif = skb->dev->ifindex;
skb->dev = NULL;
- skb_get_timestamp(skb, &qp->stamp);
+ qp->stamp = skb->tstamp;
qp->meat += skb->len;
atomic_add(skb->truesize, &ip_frag_mem);
if (offset == 0)
@@ -624,10 +624,10 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
- ihlen = head->nh.iph->ihl*4;
+ ihlen = ip_hdrlen(head);
len = ihlen + qp->len;
- if(len > 65535)
+ if (len > 65535)
goto out_oversize;
/* Head of list must not be cloned. */
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
}
skb_shinfo(head)->frag_list = head->next;
- skb_push(head, head->data - head->nh.raw);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip_frag_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -674,9 +674,9 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &qp->stamp);
+ head->tstamp = qp->stamp;
- iph = head->nh.iph;
+ iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
@@ -700,7 +700,6 @@ out_fail:
/* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
{
- struct iphdr *iph = skb->nh.iph;
struct ipq *qp;
struct net_device *dev;
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
dev = skb->dev;
/* Lookup (or create) queue header */
- if ((qp = ip_find(iph, user)) != NULL) {
+ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
struct sk_buff *ret = NULL;
spin_lock(&qp->lock);
@@ -734,7 +733,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
return NULL;
}
-void ipfrag_init(void)
+void __init ipfrag_init(void)
{
ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9151da642318..63282934725e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
return NULL;
}
-static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
- __be32 key = t->parms.i_key;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ __be32 key = parms->i_key;
unsigned h = HASH(key);
int prio = 0;
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+ return __ipgre_bucket(&t->parms);
+}
+
static void ipgre_tunnel_link(struct ip_tunnel *t)
{
struct ip_tunnel **tp = ipgre_bucket(t);
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
__be32 key = parms->i_key;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = HASH(key);
- int prio = 0;
char name[IFNAMSIZ];
- if (local)
- prio |= 1;
- if (remote && !MULTICAST(remote)) {
- prio |= 2;
- h ^= HASH(remote);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
if (key == t->parms.i_key)
return t;
@@ -320,8 +317,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
struct iphdr *iph = (struct iphdr*)skb->data;
__be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
int grehlen = (iph->ihl<<2) + 4;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
__be16 flags;
@@ -388,8 +385,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
struct iphdr *eiph;
__be16 *p = (__be16*)(dp+(iph->ihl<<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
__be32 rel_info = 0;
@@ -422,7 +419,7 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
if (n < (iph->ihl<<2))
return;
@@ -442,7 +439,7 @@ out:
return;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- n = ntohs(skb->h.icmph->un.frag.mtu);
+ n = ntohs(icmp_hdr(skb)->un.frag.mtu);
if (n < grehlen+68)
return;
n -= grehlen;
@@ -474,7 +471,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)eiph);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
memset(&fl, 0, sizeof(fl));
@@ -533,9 +530,9 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos)) {
if (skb->protocol == htons(ETH_P_IP)) {
- IP_ECN_set_ce(skb->nh.iph);
+ IP_ECN_set_ce(ip_hdr(skb));
} else if (skb->protocol == htons(ETH_P_IPV6)) {
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
}
}
@@ -565,7 +562,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, 16))
goto drop_nolock;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
h = skb->data;
flags = *(__be16*)h;
@@ -616,9 +613,10 @@ static int ipgre_rcv(struct sk_buff *skb)
offset += 4;
}
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb->h.raw, offset);
+ skb_reset_mac_header(skb);
+ __pskb_pull(skb, offset);
+ skb_reset_network_header(skb);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (MULTICAST(iph->daddr)) {
@@ -669,7 +667,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
__be16 df;
@@ -720,7 +718,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &skb->nh.ipv6h->daddr;
+ addr6 = &ipv6_hdr(skb)->daddr;
addr_type = ipv6_addr_type(addr6);
}
@@ -824,11 +822,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, gre_hlen);
+ skb->transport_header = skb->network_header;
+ skb_push(skb, gre_hlen);
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
@@ -839,7 +838,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f38e97647ac0..97069399d864 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
int ip_call_ra_chain(struct sk_buff *skb)
{
struct ip_ra_chain *ra;
- u8 protocol = skb->nh.iph->protocol;
+ u8 protocol = ip_hdr(skb)->protocol;
struct sock *last = NULL;
read_lock(&ip_ra_lock);
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
if (sk && inet_sk(sk)->num == protocol &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
- if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
if (skb == NULL) {
read_unlock(&ip_ra_lock);
@@ -198,17 +198,15 @@ int ip_call_ra_chain(struct sk_buff *skb)
static inline int ip_local_deliver_finish(struct sk_buff *skb)
{
- int ihl = skb->nh.iph->ihl*4;
-
- __skb_pull(skb, ihl);
+ __skb_pull(skb, ip_hdrlen(skb));
/* Point into the IP datagram, just past the header. */
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
rcu_read_lock();
{
/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
- int protocol = skb->nh.iph->protocol;
+ int protocol = ip_hdr(skb)->protocol;
int hash;
struct sock *raw_sk;
struct net_protocol *ipprot;
@@ -220,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
/* If there maybe a raw socket we must check - if not we
* don't care less
*/
- if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
+ if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
raw_sk = NULL;
if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
@@ -266,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
* Reassemble IP fragments.
*/
- if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
if (!skb)
return 0;
@@ -294,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
goto drop;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (ip_options_compile(NULL, skb)) {
IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
@@ -330,7 +328,8 @@ drop:
static inline int ip_rcv_finish(struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt;
/*
* Initialise the virtual path cache for the packet. It describes
@@ -342,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
if (unlikely(err)) {
if (err == -EHOSTUNREACH)
IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ else if (err == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
goto drop;
}
}
@@ -360,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
if (iph->ihl > 5 && ip_rcv_options(skb))
goto drop;
+ rt = (struct rtable*)skb->dst;
+ if (rt->rt_type == RTN_MULTICAST)
+ IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+ else if (rt->rt_type == RTN_BROADCAST)
+ IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+
return dst_input(skb);
drop:
@@ -391,7 +398,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
/*
* RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
@@ -410,13 +417,16 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto inhdr_error;
len = ntohs(iph->tot_len);
- if (skb->len < len || len < (iph->ihl*4))
+ if (skb->len < len) {
+ IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ } else if (len < (iph->ihl*4))
goto inhdr_error;
/* Our transport medium may have padded the buffer out. Now we know it
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a87..251346828cb4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
__be32 daddr, struct rtable *rt, int is_frag)
{
- unsigned char * iph = skb->nh.raw;
+ unsigned char *iph = skb_network_header(skb);
memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,13 +104,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
return 0;
}
- sptr = skb->nh.raw;
+ sptr = skb_network_header(skb);
dptr = dopt->__data;
if (skb->dst)
daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
else
- daddr = skb->nh.iph->daddr;
+ daddr = ip_hdr(skb)->daddr;
if (sopt->rr) {
optlen = sptr[sopt->rr+1];
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
/*
* RFC1812 requires to fix illegal source routes.
*/
- if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+ if (memcmp(&ip_hdr(skb)->saddr,
+ &start[soffset + 3], 4) == 0)
doffset -= 4;
}
if (doffset > 3) {
@@ -217,7 +218,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
void ip_options_fragment(struct sk_buff * skb)
{
- unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
+ unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
struct ip_options * opt = &(IPCB(skb)->opt);
int l = opt->optlen;
int optlen;
@@ -264,12 +265,13 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (!opt) {
opt = &(IPCB(skb)->opt);
- iph = skb->nh.raw;
+ iph = skb_network_header(skb);
opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
optptr = iph + sizeof(struct iphdr);
opt->is_data = 0;
} else {
- optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+ optptr = opt->is_data ? opt->__data :
+ (unsigned char *)&(ip_hdr(skb)[1]);
iph = optptr - sizeof(struct iphdr);
}
@@ -563,7 +565,7 @@ void ip_forward_options(struct sk_buff *skb)
struct ip_options * opt = &(IPCB(skb)->opt);
unsigned char * optptr;
struct rtable *rt = (struct rtable*)skb->dst;
- unsigned char *raw = skb->nh.raw;
+ unsigned char *raw = skb_network_header(skb);
if (opt->rr_needaddr) {
optptr = (unsigned char *)raw + opt->rr;
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb)
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
ip_rt_get_source(&optptr[srrptr-1], rt);
- skb->nh.iph->daddr = rt->rt_dst;
+ ip_hdr(skb)->daddr = rt->rt_dst;
optptr[2] = srrptr+4;
} else if (net_ratelimit())
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb)
}
if (opt->is_changed) {
opt->is_changed = 0;
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
}
}
@@ -608,8 +610,8 @@ int ip_options_rcv_srr(struct sk_buff *skb)
struct ip_options *opt = &(IPCB(skb)->opt);
int srrspace, srrptr;
__be32 nexthop;
- struct iphdr *iph = skb->nh.iph;
- unsigned char * optptr = skb->nh.raw + opt->srr;
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned char *optptr = skb_network_header(skb) + opt->srr;
struct rtable *rt = (struct rtable*)skb->dst;
struct rtable *rt2;
int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6d..d6427d918512 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph)
/* dev_loopback_xmit for use with netfilter. */
static int ip_dev_loopback_xmit(struct sk_buff *newskb)
{
- newskb->mac.raw = newskb->data;
- __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ skb_reset_mac_header(newskb);
+ __skb_pull(newskb, skb_network_offset(newskb));
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
struct iphdr *iph;
/* Build the IP header. */
- if (opt)
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
- else
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
-
+ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->protocol = sk->sk_protocol;
iph->tot_len = htons(skb->len);
ip_select_ident(iph, &rt->u.dst, sk);
- skb->nh.iph = iph;
if (opt && opt->optlen) {
iph->ihl += opt->optlen>>2;
@@ -163,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
+ struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
int hh_len = LL_RESERVED_SPACE(dev);
+ if (rt->rt_type == RTN_MULTICAST)
+ IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ else if (rt->rt_type == RTN_BROADCAST)
+ IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
struct sk_buff *skb2;
@@ -192,6 +195,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+ struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
+
+ return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
+ skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
static inline int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -201,7 +212,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+ if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -248,7 +259,7 @@ int ip_mc_output(struct sk_buff *skb)
/* Multicasts with ttl 0 must not go beyond the host */
- if (skb->nh.iph->ttl == 0) {
+ if (ip_hdr(skb)->ttl == 0) {
kfree_skb(skb);
return 0;
}
@@ -333,7 +344,9 @@ packet_routed:
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
- iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
iph->tot_len = htons(skb->len);
if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +357,6 @@ packet_routed:
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
- skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
if (opt && opt->optlen) {
@@ -386,21 +398,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
-#ifdef CONFIG_NETFILTER
- /* Connection association is same as pre-frag packet */
- nf_conntrack_put(to->nfct);
- to->nfct = from->nfct;
- nf_conntrack_get(to->nfct);
- to->nfctinfo = from->nfctinfo;
+ nf_copy(to, from);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to->ipvs_property = from->ipvs_property;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- nf_bridge_put(to->nf_bridge);
- to->nf_bridge = from->nf_bridge;
- nf_bridge_get(to->nf_bridge);
-#endif
-#endif
skb_copy_secmark(to, from);
}
@@ -430,12 +431,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* Point into the IP datagram header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_mtu(&rt->u.dst)));
+ htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -502,10 +503,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* before previous one went down. */
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
- frag->h.raw = frag->data;
- frag->nh.raw = __skb_push(frag, hlen);
- memcpy(frag->nh.raw, iph, hlen);
- iph = frag->nh.iph;
+ skb_reset_transport_header(frag);
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iph, hlen);
+ iph = ip_hdr(frag);
iph->tot_len = htons(frag->len);
ip_copy_metadata(frag, skb);
if (offset == 0)
@@ -566,7 +568,7 @@ slow_path:
* Keep copying data until we run out.
*/
- while(left > 0) {
+ while (left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
@@ -593,8 +595,8 @@ slow_path:
ip_copy_metadata(skb2, skb);
skb_reserve(skb2, ll_rs);
skb_put(skb2, len + hlen);
- skb2->nh.raw = skb2->data;
- skb2->h.raw = skb2->data + hlen;
+ skb_reset_network_header(skb2);
+ skb2->transport_header = skb2->network_header + hlen;
/*
* Charge the memory for the fragment to any owner
@@ -608,19 +610,19 @@ slow_path:
* Copy the packet header into the new buffer.
*/
- memcpy(skb2->nh.raw, skb->data, hlen);
+ skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+ if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
BUG();
left -= len;
/*
* Fill in the new header fields.
*/
- iph = skb2->nh.iph;
+ iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
/* ANK: dirty, but effective trick. Upgrade options only if
@@ -722,10 +724,10 @@ static inline int ip_ufo_append_data(struct sock *sk,
skb_put(skb,fragheaderlen + transhdrlen);
/* initialize network header pointer */
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* initialize protocol header pointer */
- skb->h.raw = skb->data + fragheaderlen;
+ skb->transport_header = skb->network_header + fragheaderlen;
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
@@ -799,7 +801,9 @@ int ip_append_data(struct sock *sk,
inet->cork.addr = ipc->addr;
}
dst_hold(&rt->u.dst);
- inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+ inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+ rt->u.dst.dev->mtu :
+ dst_mtu(rt->u.dst.path);
inet->cork.rt = rt;
inet->cork.length = 0;
sk->sk_sndmsg_page = NULL;
@@ -929,9 +933,10 @@ alloc_new_skb:
* Find where to start putting bytes.
*/
data = skb_put(skb, fraglen);
- skb->nh.raw = data + exthdrlen;
+ skb_set_network_header(skb, exthdrlen);
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
data += fragheaderlen;
- skb->h.raw = data + exthdrlen;
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
@@ -1100,8 +1105,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
}
if (len <= 0) {
struct sk_buff *skb_prev;
- char *data;
- struct iphdr *iph;
int alloclen;
skb_prev = skb;
@@ -1124,15 +1127,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fragheaderlen + fraggap);
- skb->nh.iph = iph = (struct iphdr *)data;
- data += fragheaderlen;
- skb->h.raw = data;
-
+ skb_put(skb, fragheaderlen + fraggap);
+ skb_reset_network_header(skb);
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
if (fraggap) {
- skb->csum = skb_copy_and_csum_bits(
- skb_prev, maxfraglen,
- data, fraggap, 0);
+ skb->csum = skb_copy_and_csum_bits(skb_prev,
+ maxfraglen,
+ skb_transport_header(skb),
+ fraggap, 0);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
pskb_trim_unique(skb_prev, maxfraglen);
@@ -1198,10 +1201,10 @@ int ip_push_pending_frames(struct sock *sk)
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
- if (skb->data < skb->nh.raw)
- __skb_pull(skb, skb->nh.raw - skb->data);
+ if (skb->data < skb_network_header(skb))
+ __skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
- __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
@@ -1216,13 +1219,13 @@ int ip_push_pending_frames(struct sock *sk)
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
- if (inet->pmtudisc != IP_PMTUDISC_DO)
+ if (inet->pmtudisc < IP_PMTUDISC_DO)
skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame
* locally. */
- if (inet->pmtudisc == IP_PMTUDISC_DO ||
+ if (inet->pmtudisc >= IP_PMTUDISC_DO ||
(skb->len <= dst_mtu(&rt->u.dst) &&
ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF);
@@ -1352,11 +1355,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
- .tos = RT_TOS(skb->nh.iph->tos) } },
+ .tos = RT_TOS(ip_hdr(skb)->tos) } },
/* Not quite clean, but right. */
.uli_u = { .ports =
- { .sport = skb->h.th->dest,
- .dport = skb->h.th->source } },
+ { .sport = tcp_hdr(skb)->dest,
+ .dport = tcp_hdr(skb)->source } },
.proto = sk->sk_protocol };
security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
@@ -1370,14 +1373,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
with locally disabled BH and that sk cannot be already spinlocked.
*/
bh_lock_sock(sk);
- inet->tos = skb->nh.iph->tos;
+ inet->tos = ip_hdr(skb)->tos;
sk->sk_priority = skb->priority;
- sk->sk_protocol = skb->nh.iph->protocol;
+ sk->sk_protocol = ip_hdr(skb)->protocol;
ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
- *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+ *((__sum16 *)skb_transport_header(skb) +
+ arg->csumoffset) = csum_fold(csum_add(skb->csum,
+ arg->csum));
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(sk);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23048d9f3584..4d544573f48a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
struct in_pktinfo info;
struct rtable *rt = (struct rtable *)skb->dst;
- info.ipi_addr.s_addr = skb->nh.iph->daddr;
+ info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
if (rt) {
info.ipi_ifindex = rt->rt_iif;
info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
{
- int ttl = skb->nh.iph->ttl;
+ int ttl = ip_hdr(skb)->ttl;
put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
}
static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
{
- put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+ put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
}
static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
if (IPCB(skb)->opt.optlen == 0)
return;
- put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+ put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
+ ip_hdr(skb) + 1);
}
@@ -268,18 +269,21 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
- serr->ee.ee_type = skb->h.icmph->type;
- serr->ee.ee_code = skb->h.icmph->code;
+ serr->ee.ee_type = icmp_hdr(skb)->type;
+ serr->ee.ee_code = icmp_hdr(skb)->code;
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+ serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
+ skb_network_header(skb);
serr->port = port;
- skb->h.raw = payload;
- if (!skb_pull(skb, payload - skb->data) ||
- sock_queue_err_skb(sk, skb))
- kfree_skb(skb);
+ if (skb_pull(skb, payload - skb->data) != NULL) {
+ skb_reset_transport_header(skb);
+ if (sock_queue_err_skb(sk, skb) == 0)
+ return;
+ }
+ kfree_skb(skb);
}
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
@@ -296,8 +300,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
if (!skb)
return;
- iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
- skb->nh.iph = iph;
+ skb_put(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->daddr = daddr;
serr = SKB_EXT_ERR(skb);
@@ -308,11 +313,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+ serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
serr->port = port;
- skb->h.raw = skb->tail;
- __skb_pull(skb, skb->tail - skb->data);
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -354,7 +359,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
+ sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
+ serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
@@ -366,7 +372,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
struct inet_sock *inet = inet_sk(sk);
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
if (inet->cmsg_flags)
@@ -403,20 +409,20 @@ out:
*/
static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, int optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val=0,err;
if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC))) ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_LOOP) {
+ (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
+ (1<<IP_RETOPTS) | (1<<IP_TOS) |
+ (1<<IP_TTL) | (1<<IP_HDRINCL) |
+ (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
+ (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
+ (1<<IP_PASSSEC))) ||
+ optname == IP_MULTICAST_TTL ||
+ optname == IP_MULTICAST_LOOP) {
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -440,444 +446,444 @@ static int do_ip_setsockopt(struct sock *sk, int level,
lock_sock(sk);
switch (optname) {
- case IP_OPTIONS:
- {
- struct ip_options * opt = NULL;
- if (optlen > 40 || optlen < 0)
- goto e_inval;
- err = ip_options_get_from_user(&opt, optval, optlen);
- if (err)
- break;
- if (inet->is_icsk) {
- struct inet_connection_sock *icsk = inet_csk(sk);
+ case IP_OPTIONS:
+ {
+ struct ip_options * opt = NULL;
+ if (optlen > 40 || optlen < 0)
+ goto e_inval;
+ err = ip_options_get_from_user(&opt, optval, optlen);
+ if (err)
+ break;
+ if (inet->is_icsk) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- if (sk->sk_family == PF_INET ||
- (!((1 << sk->sk_state) &
- (TCPF_LISTEN | TCPF_CLOSE)) &&
- inet->daddr != LOOPBACK4_IPV6)) {
+ if (sk->sk_family == PF_INET ||
+ (!((1 << sk->sk_state) &
+ (TCPF_LISTEN | TCPF_CLOSE)) &&
+ inet->daddr != LOOPBACK4_IPV6)) {
#endif
- if (inet->opt)
- icsk->icsk_ext_hdr_len -= inet->opt->optlen;
- if (opt)
- icsk->icsk_ext_hdr_len += opt->optlen;
- icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ if (inet->opt)
+ icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+ if (opt)
+ icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- }
-#endif
}
- opt = xchg(&inet->opt, opt);
- kfree(opt);
- break;
+#endif
}
- case IP_PKTINFO:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PKTINFO;
- else
- inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
- break;
- case IP_RECVTTL:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TTL;
- else
- inet->cmsg_flags &= ~IP_CMSG_TTL;
- break;
- case IP_RECVTOS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TOS;
- else
- inet->cmsg_flags &= ~IP_CMSG_TOS;
- break;
- case IP_RECVOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
- break;
- case IP_RETOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RETOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+ opt = xchg(&inet->opt, opt);
+ kfree(opt);
+ break;
+ }
+ case IP_PKTINFO:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_PKTINFO;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
+ break;
+ case IP_RECVTTL:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_TTL;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_TTL;
+ break;
+ case IP_RECVTOS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_TOS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_TOS;
+ break;
+ case IP_RECVOPTS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RECVOPTS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
+ break;
+ case IP_RETOPTS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RETOPTS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+ break;
+ case IP_PASSSEC:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_PASSSEC;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+ break;
+ case IP_TOS: /* This sets both TOS and Precedence */
+ if (sk->sk_type == SOCK_STREAM) {
+ val &= ~3;
+ val |= inet->tos & 3;
+ }
+ if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
+ !capable(CAP_NET_ADMIN)) {
+ err = -EPERM;
break;
- case IP_PASSSEC:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PASSSEC;
- else
- inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+ }
+ if (inet->tos != val) {
+ inet->tos = val;
+ sk->sk_priority = rt_tos2priority(val);
+ sk_dst_reset(sk);
+ }
+ break;
+ case IP_TTL:
+ if (optlen<1)
+ goto e_inval;
+ if (val != -1 && (val < 1 || val>255))
+ goto e_inval;
+ inet->uc_ttl = val;
+ break;
+ case IP_HDRINCL:
+ if (sk->sk_type != SOCK_RAW) {
+ err = -ENOPROTOOPT;
break;
- case IP_TOS: /* This sets both TOS and Precedence */
- if (sk->sk_type == SOCK_STREAM) {
- val &= ~3;
- val |= inet->tos & 3;
- }
- if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
- !capable(CAP_NET_ADMIN)) {
- err = -EPERM;
+ }
+ inet->hdrincl = val ? 1 : 0;
+ break;
+ case IP_MTU_DISCOVER:
+ if (val<0 || val>3)
+ goto e_inval;
+ inet->pmtudisc = val;
+ break;
+ case IP_RECVERR:
+ inet->recverr = !!val;
+ if (!val)
+ skb_queue_purge(&sk->sk_error_queue);
+ break;
+ case IP_MULTICAST_TTL:
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ if (optlen<1)
+ goto e_inval;
+ if (val==-1)
+ val = 1;
+ if (val < 0 || val > 255)
+ goto e_inval;
+ inet->mc_ttl = val;
+ break;
+ case IP_MULTICAST_LOOP:
+ if (optlen<1)
+ goto e_inval;
+ inet->mc_loop = !!val;
+ break;
+ case IP_MULTICAST_IF:
+ {
+ struct ip_mreqn mreq;
+ struct net_device *dev = NULL;
+
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ /*
+ * Check the arguments are allowable
+ */
+
+ err = -EFAULT;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if (copy_from_user(&mreq,optval,sizeof(mreq)))
break;
- }
- if (inet->tos != val) {
- inet->tos = val;
- sk->sk_priority = rt_tos2priority(val);
- sk_dst_reset(sk);
- }
- break;
- case IP_TTL:
- if (optlen<1)
- goto e_inval;
- if (val != -1 && (val < 1 || val>255))
- goto e_inval;
- inet->uc_ttl = val;
- break;
- case IP_HDRINCL:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (optlen >= sizeof(struct in_addr) &&
+ copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+ break;
+ }
+
+ if (!mreq.imr_ifindex) {
+ if (mreq.imr_address.s_addr == INADDR_ANY) {
+ inet->mc_index = 0;
+ inet->mc_addr = 0;
+ err = 0;
break;
}
- inet->hdrincl = val ? 1 : 0;
- break;
- case IP_MTU_DISCOVER:
- if (val<0 || val>2)
- goto e_inval;
- inet->pmtudisc = val;
- break;
- case IP_RECVERR:
- inet->recverr = !!val;
- if (!val)
- skb_queue_purge(&sk->sk_error_queue);
- break;
- case IP_MULTICAST_TTL:
- if (sk->sk_type == SOCK_STREAM)
- goto e_inval;
- if (optlen<1)
- goto e_inval;
- if (val==-1)
- val = 1;
- if (val < 0 || val > 255)
- goto e_inval;
- inet->mc_ttl = val;
- break;
- case IP_MULTICAST_LOOP:
- if (optlen<1)
- goto e_inval;
- inet->mc_loop = !!val;
- break;
- case IP_MULTICAST_IF:
- {
- struct ip_mreqn mreq;
- struct net_device *dev = NULL;
+ dev = ip_dev_find(mreq.imr_address.s_addr);
+ if (dev) {
+ mreq.imr_ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ } else
+ dev = __dev_get_by_index(mreq.imr_ifindex);
- if (sk->sk_type == SOCK_STREAM)
- goto e_inval;
- /*
- * Check the arguments are allowable
- */
- err = -EFAULT;
- if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq,optval,sizeof(mreq)))
- break;
- } else {
- memset(&mreq, 0, sizeof(mreq));
- if (optlen >= sizeof(struct in_addr) &&
- copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
- break;
- }
+ err = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
- if (!mreq.imr_ifindex) {
- if (mreq.imr_address.s_addr == INADDR_ANY) {
- inet->mc_index = 0;
- inet->mc_addr = 0;
- err = 0;
- break;
- }
- dev = ip_dev_find(mreq.imr_address.s_addr);
- if (dev) {
- mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
- } else
- dev = __dev_get_by_index(mreq.imr_ifindex);
+ err = -EINVAL;
+ if (sk->sk_bound_dev_if &&
+ mreq.imr_ifindex != sk->sk_bound_dev_if)
+ break;
+ inet->mc_index = mreq.imr_ifindex;
+ inet->mc_addr = mreq.imr_address.s_addr;
+ err = 0;
+ break;
+ }
- err = -EADDRNOTAVAIL;
- if (!dev)
- break;
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ {
+ struct ip_mreqn mreq;
- err = -EINVAL;
- if (sk->sk_bound_dev_if &&
- mreq.imr_ifindex != sk->sk_bound_dev_if)
+ if (optlen < sizeof(struct ip_mreq))
+ goto e_inval;
+ err = -EFAULT;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if (copy_from_user(&mreq,optval,sizeof(mreq)))
break;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+ break;
+ }
- inet->mc_index = mreq.imr_ifindex;
- inet->mc_addr = mreq.imr_address.s_addr;
- err = 0;
+ if (optname == IP_ADD_MEMBERSHIP)
+ err = ip_mc_join_group(sk, &mreq);
+ else
+ err = ip_mc_leave_group(sk, &mreq);
+ break;
+ }
+ case IP_MSFILTER:
+ {
+ extern int sysctl_igmp_max_msf;
+ struct ip_msfilter *msf;
+
+ if (optlen < IP_MSFILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ err = -ENOBUFS;
break;
}
+ msf = kmalloc(optlen, GFP_KERNEL);
+ if (msf == 0) {
+ err = -ENOBUFS;
+ break;
+ }
+ err = -EFAULT;
+ if (copy_from_user(msf, optval, optlen)) {
+ kfree(msf);
+ break;
+ }
+ /* numsrc >= (1G-4) overflow in 32 bits */
+ if (msf->imsf_numsrc >= 0x3ffffffcU ||
+ msf->imsf_numsrc > sysctl_igmp_max_msf) {
+ kfree(msf);
+ err = -ENOBUFS;
+ break;
+ }
+ if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
+ kfree(msf);
+ err = -EINVAL;
+ break;
+ }
+ err = ip_mc_msfilter(sk, msf, 0);
+ kfree(msf);
+ break;
+ }
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ {
+ struct ip_mreq_source mreqs;
+ int omode, add;
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- {
- struct ip_mreqn mreq;
-
- if (optlen < sizeof(struct ip_mreq))
- goto e_inval;
+ if (optlen != sizeof(struct ip_mreq_source))
+ goto e_inval;
+ if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
err = -EFAULT;
- if (optlen >= sizeof(struct ip_mreqn)) {
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- break;
- } else {
- memset(&mreq, 0, sizeof(mreq));
- if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
- break;
- }
-
- if (optname == IP_ADD_MEMBERSHIP)
- err = ip_mc_join_group(sk, &mreq);
- else
- err = ip_mc_leave_group(sk, &mreq);
break;
}
- case IP_MSFILTER:
- {
- extern int sysctl_igmp_max_msf;
- struct ip_msfilter *msf;
+ if (optname == IP_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == IP_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
+ struct ip_mreqn mreq;
- if (optlen < IP_MSFILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- err = -ENOBUFS;
- break;
- }
- msf = kmalloc(optlen, GFP_KERNEL);
- if (msf == 0) {
- err = -ENOBUFS;
+ mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
+ mreq.imr_address.s_addr = mreqs.imr_interface;
+ mreq.imr_ifindex = 0;
+ err = ip_mc_join_group(sk, &mreq);
+ if (err && err != -EADDRINUSE)
break;
- }
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ err = ip_mc_source(add, omode, sk, &mreqs, 0);
+ break;
+ }
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct group_req greq;
+ struct sockaddr_in *psin;
+ struct ip_mreqn mreq;
+
+ if (optlen < sizeof(struct group_req))
+ goto e_inval;
+ err = -EFAULT;
+ if (copy_from_user(&greq, optval, sizeof(greq)))
+ break;
+ psin = (struct sockaddr_in *)&greq.gr_group;
+ if (psin->sin_family != AF_INET)
+ goto e_inval;
+ memset(&mreq, 0, sizeof(mreq));
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_ifindex = greq.gr_interface;
+
+ if (optname == MCAST_JOIN_GROUP)
+ err = ip_mc_join_group(sk, &mreq);
+ else
+ err = ip_mc_leave_group(sk, &mreq);
+ break;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct group_source_req greqs;
+ struct ip_mreq_source mreqs;
+ struct sockaddr_in *psin;
+ int omode, add;
+
+ if (optlen != sizeof(struct group_source_req))
+ goto e_inval;
+ if (copy_from_user(&greqs, optval, sizeof(greqs))) {
err = -EFAULT;
- if (copy_from_user(msf, optval, optlen)) {
- kfree(msf);
- break;
- }
- /* numsrc >= (1G-4) overflow in 32 bits */
- if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > sysctl_igmp_max_msf) {
- kfree(msf);
- err = -ENOBUFS;
- break;
- }
- if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
- kfree(msf);
- err = -EINVAL;
- break;
- }
- err = ip_mc_msfilter(sk, msf, 0);
- kfree(msf);
break;
}
- case IP_BLOCK_SOURCE:
- case IP_UNBLOCK_SOURCE:
- case IP_ADD_SOURCE_MEMBERSHIP:
- case IP_DROP_SOURCE_MEMBERSHIP:
- {
- struct ip_mreq_source mreqs;
- int omode, add;
-
- if (optlen != sizeof(struct ip_mreq_source))
- goto e_inval;
- if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
- err = -EFAULT;
- break;
- }
- if (optname == IP_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == IP_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
- struct ip_mreqn mreq;
-
- mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
- mreq.imr_address.s_addr = mreqs.imr_interface;
- mreq.imr_ifindex = 0;
- err = ip_mc_join_group(sk, &mreq);
- if (err && err != -EADDRINUSE)
- break;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- err = ip_mc_source(add, omode, sk, &mreqs, 0);
+ if (greqs.gsr_group.ss_family != AF_INET ||
+ greqs.gsr_source.ss_family != AF_INET) {
+ err = -EADDRNOTAVAIL;
break;
}
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- {
- struct group_req greq;
- struct sockaddr_in *psin;
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
+ mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+ psin = (struct sockaddr_in *)&greqs.gsr_source;
+ mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+ mreqs.imr_interface = 0; /* use index for mc_source */
+
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
struct ip_mreqn mreq;
- if (optlen < sizeof(struct group_req))
- goto e_inval;
- err = -EFAULT;
- if(copy_from_user(&greq, optval, sizeof(greq)))
- break;
- psin = (struct sockaddr_in *)&greq.gr_group;
- if (psin->sin_family != AF_INET)
- goto e_inval;
- memset(&mreq, 0, sizeof(mreq));
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_ifindex = greq.gr_interface;
-
- if (optname == MCAST_JOIN_GROUP)
- err = ip_mc_join_group(sk, &mreq);
- else
- err = ip_mc_leave_group(sk, &mreq);
+ mreq.imr_address.s_addr = 0;
+ mreq.imr_ifindex = greqs.gsr_interface;
+ err = ip_mc_join_group(sk, &mreq);
+ if (err && err != -EADDRINUSE)
+ break;
+ greqs.gsr_interface = mreq.imr_ifindex;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ err = ip_mc_source(add, omode, sk, &mreqs,
+ greqs.gsr_interface);
+ break;
+ }
+ case MCAST_MSFILTER:
+ {
+ extern int sysctl_igmp_max_msf;
+ struct sockaddr_in *psin;
+ struct ip_msfilter *msf = NULL;
+ struct group_filter *gsf = NULL;
+ int msize, i, ifindex;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ err = -ENOBUFS;
break;
}
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- {
- struct group_source_req greqs;
- struct ip_mreq_source mreqs;
- struct sockaddr_in *psin;
- int omode, add;
-
- if (optlen != sizeof(struct group_source_req))
- goto e_inval;
- if (copy_from_user(&greqs, optval, sizeof(greqs))) {
- err = -EFAULT;
- break;
- }
- if (greqs.gsr_group.ss_family != AF_INET ||
- greqs.gsr_source.ss_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- break;
- }
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreqs.imr_multiaddr = psin->sin_addr.s_addr;
- psin = (struct sockaddr_in *)&greqs.gsr_source;
- mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
- mreqs.imr_interface = 0; /* use index for mc_source */
-
- if (optname == MCAST_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == MCAST_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
- struct ip_mreqn mreq;
-
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_address.s_addr = 0;
- mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group(sk, &mreq);
- if (err && err != -EADDRINUSE)
- break;
- greqs.gsr_interface = mreq.imr_ifindex;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* MCAST_LEAVE_SOURCE_GROUP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- err = ip_mc_source(add, omode, sk, &mreqs,
- greqs.gsr_interface);
+ gsf = kmalloc(optlen,GFP_KERNEL);
+ if (gsf == 0) {
+ err = -ENOBUFS;
break;
}
- case MCAST_MSFILTER:
- {
- extern int sysctl_igmp_max_msf;
- struct sockaddr_in *psin;
- struct ip_msfilter *msf = NULL;
- struct group_filter *gsf = NULL;
- int msize, i, ifindex;
-
- if (optlen < GROUP_FILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- err = -ENOBUFS;
- break;
- }
- gsf = kmalloc(optlen,GFP_KERNEL);
- if (gsf == 0) {
- err = -ENOBUFS;
- break;
- }
- err = -EFAULT;
- if (copy_from_user(gsf, optval, optlen)) {
- goto mc_msf_out;
- }
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sysctl_igmp_max_msf) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
- err = -EINVAL;
- goto mc_msf_out;
- }
- msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
- msf = kmalloc(msize,GFP_KERNEL);
- if (msf == 0) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- ifindex = gsf->gf_interface;
- psin = (struct sockaddr_in *)&gsf->gf_group;
- if (psin->sin_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- goto mc_msf_out;
- }
- msf->imsf_multiaddr = psin->sin_addr.s_addr;
- msf->imsf_interface = 0;
- msf->imsf_fmode = gsf->gf_fmode;
- msf->imsf_numsrc = gsf->gf_numsrc;
+ err = -EFAULT;
+ if (copy_from_user(gsf, optval, optlen)) {
+ goto mc_msf_out;
+ }
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (gsf->gf_numsrc >= 0x1ffffff ||
+ gsf->gf_numsrc > sysctl_igmp_max_msf) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+ err = -EINVAL;
+ goto mc_msf_out;
+ }
+ msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
+ msf = kmalloc(msize,GFP_KERNEL);
+ if (msf == 0) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ ifindex = gsf->gf_interface;
+ psin = (struct sockaddr_in *)&gsf->gf_group;
+ if (psin->sin_family != AF_INET) {
err = -EADDRNOTAVAIL;
- for (i=0; i<gsf->gf_numsrc; ++i) {
- psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
- if (psin->sin_family != AF_INET)
- goto mc_msf_out;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
- }
- kfree(gsf);
- gsf = NULL;
-
- err = ip_mc_msfilter(sk, msf, ifindex);
-mc_msf_out:
- kfree(msf);
- kfree(gsf);
- break;
+ goto mc_msf_out;
}
- case IP_ROUTER_ALERT:
- err = ip_ra_control(sk, val ? 1 : 0, NULL);
- break;
-
- case IP_FREEBIND:
- if (optlen<1)
- goto e_inval;
- inet->freebind = !!val;
- break;
+ msf->imsf_multiaddr = psin->sin_addr.s_addr;
+ msf->imsf_interface = 0;
+ msf->imsf_fmode = gsf->gf_fmode;
+ msf->imsf_numsrc = gsf->gf_numsrc;
+ err = -EADDRNOTAVAIL;
+ for (i=0; i<gsf->gf_numsrc; ++i) {
+ psin = (struct sockaddr_in *)&gsf->gf_slist[i];
- case IP_IPSEC_POLICY:
- case IP_XFRM_POLICY:
- err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
- break;
- err = xfrm_user_policy(sk, optname, optval, optlen);
+ if (psin->sin_family != AF_INET)
+ goto mc_msf_out;
+ msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ }
+ kfree(gsf);
+ gsf = NULL;
+
+ err = ip_mc_msfilter(sk, msf, ifindex);
+ mc_msf_out:
+ kfree(msf);
+ kfree(gsf);
+ break;
+ }
+ case IP_ROUTER_ALERT:
+ err = ip_ra_control(sk, val ? 1 : 0, NULL);
+ break;
+
+ case IP_FREEBIND:
+ if (optlen<1)
+ goto e_inval;
+ inet->freebind = !!val;
+ break;
+
+ case IP_IPSEC_POLICY:
+ case IP_XFRM_POLICY:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
break;
+ err = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
- default:
- err = -ENOPROTOOPT;
- break;
+ default:
+ err = -ENOPROTOOPT;
+ break;
}
release_sock(sk);
return err;
@@ -948,214 +954,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val;
int len;
- if(level!=SOL_IP)
+ if (level != SOL_IP)
return -EOPNOTSUPP;
#ifdef CONFIG_IP_MROUTE
- if(optname>=MRT_BASE && optname <=MRT_BASE+10)
- {
+ if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
return ip_mroute_getsockopt(sk,optname,optval,optlen);
}
#endif
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
- if(len < 0)
+ if (len < 0)
return -EINVAL;
lock_sock(sk);
- switch(optname) {
- case IP_OPTIONS:
- {
- unsigned char optbuf[sizeof(struct ip_options)+40];
- struct ip_options * opt = (struct ip_options*)optbuf;
- opt->optlen = 0;
- if (inet->opt)
- memcpy(optbuf, inet->opt,
- sizeof(struct ip_options)+
- inet->opt->optlen);
- release_sock(sk);
-
- if (opt->optlen == 0)
- return put_user(0, optlen);
-
- ip_options_undo(opt);
-
- len = min_t(unsigned int, len, opt->optlen);
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval, opt->__data, len))
- return -EFAULT;
- return 0;
- }
- case IP_PKTINFO:
- val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
- break;
- case IP_RECVTTL:
- val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
- break;
- case IP_RECVTOS:
- val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
- break;
- case IP_RECVOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
- break;
- case IP_RETOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
- break;
- case IP_PASSSEC:
- val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
- break;
- case IP_TOS:
- val = inet->tos;
- break;
- case IP_TTL:
- val = (inet->uc_ttl == -1 ?
- sysctl_ip_default_ttl :
- inet->uc_ttl);
- break;
- case IP_HDRINCL:
- val = inet->hdrincl;
- break;
- case IP_MTU_DISCOVER:
- val = inet->pmtudisc;
- break;
- case IP_MTU:
- {
- struct dst_entry *dst;
- val = 0;
- dst = sk_dst_get(sk);
- if (dst) {
- val = dst_mtu(dst);
- dst_release(dst);
- }
- if (!val) {
- release_sock(sk);
- return -ENOTCONN;
- }
- break;
+ switch (optname) {
+ case IP_OPTIONS:
+ {
+ unsigned char optbuf[sizeof(struct ip_options)+40];
+ struct ip_options * opt = (struct ip_options*)optbuf;
+ opt->optlen = 0;
+ if (inet->opt)
+ memcpy(optbuf, inet->opt,
+ sizeof(struct ip_options)+
+ inet->opt->optlen);
+ release_sock(sk);
+
+ if (opt->optlen == 0)
+ return put_user(0, optlen);
+
+ ip_options_undo(opt);
+
+ len = min_t(unsigned int, len, opt->optlen);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, opt->__data, len))
+ return -EFAULT;
+ return 0;
+ }
+ case IP_PKTINFO:
+ val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
+ break;
+ case IP_RECVTTL:
+ val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
+ break;
+ case IP_RECVTOS:
+ val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
+ break;
+ case IP_RECVOPTS:
+ val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+ break;
+ case IP_RETOPTS:
+ val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
+ break;
+ case IP_PASSSEC:
+ val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
+ break;
+ case IP_TOS:
+ val = inet->tos;
+ break;
+ case IP_TTL:
+ val = (inet->uc_ttl == -1 ?
+ sysctl_ip_default_ttl :
+ inet->uc_ttl);
+ break;
+ case IP_HDRINCL:
+ val = inet->hdrincl;
+ break;
+ case IP_MTU_DISCOVER:
+ val = inet->pmtudisc;
+ break;
+ case IP_MTU:
+ {
+ struct dst_entry *dst;
+ val = 0;
+ dst = sk_dst_get(sk);
+ if (dst) {
+ val = dst_mtu(dst);
+ dst_release(dst);
}
- case IP_RECVERR:
- val = inet->recverr;
- break;
- case IP_MULTICAST_TTL:
- val = inet->mc_ttl;
- break;
- case IP_MULTICAST_LOOP:
- val = inet->mc_loop;
- break;
- case IP_MULTICAST_IF:
- {
- struct in_addr addr;
- len = min_t(unsigned int, len, sizeof(struct in_addr));
- addr.s_addr = inet->mc_addr;
+ if (!val) {
release_sock(sk);
-
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval, &addr, len))
- return -EFAULT;
- return 0;
+ return -ENOTCONN;
}
- case IP_MSFILTER:
- {
- struct ip_msfilter msf;
- int err;
+ break;
+ }
+ case IP_RECVERR:
+ val = inet->recverr;
+ break;
+ case IP_MULTICAST_TTL:
+ val = inet->mc_ttl;
+ break;
+ case IP_MULTICAST_LOOP:
+ val = inet->mc_loop;
+ break;
+ case IP_MULTICAST_IF:
+ {
+ struct in_addr addr;
+ len = min_t(unsigned int, len, sizeof(struct in_addr));
+ addr.s_addr = inet->mc_addr;
+ release_sock(sk);
- if (len < IP_MSFILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
- }
- if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
- }
- err = ip_mc_msfget(sk, &msf,
- (struct ip_msfilter __user *)optval, optlen);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &addr, len))
+ return -EFAULT;
+ return 0;
+ }
+ case IP_MSFILTER:
+ {
+ struct ip_msfilter msf;
+ int err;
+
+ if (len < IP_MSFILTER_SIZE(0)) {
release_sock(sk);
- return err;
+ return -EINVAL;
}
- case MCAST_MSFILTER:
- {
- struct group_filter gsf;
- int err;
-
- if (len < GROUP_FILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
- }
- if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
- }
- err = ip_mc_gsfget(sk, &gsf,
- (struct group_filter __user *)optval, optlen);
+ if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
release_sock(sk);
- return err;
+ return -EFAULT;
}
- case IP_PKTOPTIONS:
- {
- struct msghdr msg;
+ err = ip_mc_msfget(sk, &msf,
+ (struct ip_msfilter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+ case MCAST_MSFILTER:
+ {
+ struct group_filter gsf;
+ int err;
+ if (len < GROUP_FILTER_SIZE(0)) {
release_sock(sk);
+ return -EINVAL;
+ }
+ if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ err = ip_mc_gsfget(sk, &gsf,
+ (struct group_filter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+ case IP_PKTOPTIONS:
+ {
+ struct msghdr msg;
+
+ release_sock(sk);
- if (sk->sk_type != SOCK_STREAM)
- return -ENOPROTOOPT;
+ if (sk->sk_type != SOCK_STREAM)
+ return -ENOPROTOOPT;
- msg.msg_control = optval;
- msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_control = optval;
+ msg.msg_controllen = len;
+ msg.msg_flags = 0;
- if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
- struct in_pktinfo info;
+ if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+ struct in_pktinfo info;
- info.ipi_addr.s_addr = inet->rcv_saddr;
- info.ipi_spec_dst.s_addr = inet->rcv_saddr;
- info.ipi_ifindex = inet->mc_index;
- put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
- }
- if (inet->cmsg_flags & IP_CMSG_TTL) {
- int hlim = inet->mc_ttl;
- put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
- }
- len -= msg.msg_controllen;
- return put_user(len, optlen);
+ info.ipi_addr.s_addr = inet->rcv_saddr;
+ info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+ info.ipi_ifindex = inet->mc_index;
+ put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
- case IP_FREEBIND:
- val = inet->freebind;
- break;
- default:
- release_sock(sk);
- return -ENOPROTOOPT;
+ if (inet->cmsg_flags & IP_CMSG_TTL) {
+ int hlim = inet->mc_ttl;
+ put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+ }
+ len -= msg.msg_controllen;
+ return put_user(len, optlen);
+ }
+ case IP_FREEBIND:
+ val = inet->freebind;
+ break;
+ default:
+ release_sock(sk);
+ return -ENOPROTOOPT;
}
release_sock(sk);
if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
unsigned char ucval = (unsigned char)val;
len = 1;
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval,&ucval,1))
+ if (copy_to_user(optval,&ucval,1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval,&val,len))
+ if (copy_to_user(optval,&val,len))
return -EFAULT;
}
return 0;
}
int ip_getsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int __user *optlen)
+ int optname, char __user *optval, int __user *optlen)
{
int err;
@@ -1169,7 +1174,7 @@ int ip_getsockopt(struct sock *sk, int level,
) {
int len;
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
lock_sock(sk);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index aa704b88f014..ab86137c71d2 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list);
static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
{
- int err, plen, dlen;
struct ipcomp_data *ipcd = x->data;
- u8 *start, *scratch;
- struct crypto_comp *tfm;
- int cpu;
-
- plen = skb->len;
- dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->data;
+ const int plen = skb->len;
+ int dlen = IPCOMP_SCRATCH_SIZE;
+ const u8 *start = skb->data;
+ const int cpu = get_cpu();
+ u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+ struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+ int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
- cpu = get_cpu();
- scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
- err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
if (err)
goto out;
@@ -72,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
skb->truesize += dlen - plen;
__skb_put(skb, dlen - plen);
- memcpy(skb->data, scratch, dlen);
+ skb_copy_to_linear_data(skb, scratch, dlen);
out:
put_cpu();
return err;
@@ -90,10 +84,10 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
/* Remove ipcomp header and decompress original payload */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ipch = (void *)skb->data;
iph->protocol = ipch->nexthdr;
- skb->h.raw = skb->nh.raw + sizeof(*ipch);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
__skb_pull(skb, sizeof(*ipch));
err = ipcomp_decompress(x, skb);
@@ -103,23 +97,16 @@ out:
static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
{
- int err, plen, dlen, ihlen;
- struct iphdr *iph = skb->nh.iph;
struct ipcomp_data *ipcd = x->data;
- u8 *start, *scratch;
- struct crypto_comp *tfm;
- int cpu;
+ const int ihlen = ip_hdrlen(skb);
+ const int plen = skb->len - ihlen;
+ int dlen = IPCOMP_SCRATCH_SIZE;
+ u8 *start = skb->data + ihlen;
+ const int cpu = get_cpu();
+ u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+ struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+ int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
- ihlen = iph->ihl * 4;
- plen = skb->len - ihlen;
- dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->data + ihlen;
-
- cpu = get_cpu();
- scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
- err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
if (err)
goto out;
@@ -142,12 +129,11 @@ out:
static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
- struct iphdr *iph;
struct ip_comp_hdr *ipch;
struct ipcomp_data *ipcd = x->data;
int hdr_len = 0;
+ struct iphdr *iph = ip_hdr(skb);
- iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
hdr_len = iph->ihl * 4;
if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
goto out_ok;
err = ipcomp_compress(x, skb);
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (err) {
goto out_ok;
@@ -188,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
spi = htonl(ntohs(ipch->cpi));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cf49de1a4983..342ca8d89458 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -192,7 +192,7 @@ static int __init ic_open_devs(void)
if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (dev == &loopback_dev)
continue;
if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop;
/* Basic sanity checks can be done without the lock. */
- rarp = (struct arphdr *)skb->h.raw;
+ rarp = (struct arphdr *)skb_transport_header(skb);
/* If this test doesn't pass, it's not IP, or we should
* ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop;
/* OK, it is all there and looks valid, process... */
- rarp = (struct arphdr *)skb->h.raw;
+ rarp = (struct arphdr *)skb_transport_header(skb);
rarp_ptr = (unsigned char *) (rarp + 1);
/* One reply at a time, please. */
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
memset(b, 0, sizeof(struct bootp_pkt));
/* Construct IP header */
- skb->nh.iph = h = &b->iph;
+ skb_reset_network_header(skb);
+ h = ip_hdr(skb);
h->version = 4;
h->ihl = 5;
h->tot_len = htons(sizeof(struct bootp_pkt));
@@ -782,7 +783,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
u8 *c;
printk("DHCP/BOOTP: Got extension %d:",*ext);
- for(c=ext+2; c<ext+2+ext[1]; c++)
+ for (c=ext+2; c<ext+2+ext[1]; c++)
printk(" %02x", *c);
printk("\n");
#endif
@@ -845,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
sizeof(struct udphdr)))
goto drop;
- b = (struct bootp_pkt *) skb->nh.iph;
+ b = (struct bootp_pkt *)skb_network_header(skb);
h = &b->iph;
if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
@@ -883,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
if (!pskb_may_pull(skb, skb->len))
goto drop;
- b = (struct bootp_pkt *) skb->nh.iph;
+ b = (struct bootp_pkt *)skb_network_header(skb);
h = &b->iph;
/* One reply at a time, please. */
@@ -938,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
if (opt[1] >= 4)
memcpy(&server_id, opt + 2, 4);
break;
- };
+ }
}
#ifdef IPCONFIG_DEBUG
@@ -983,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
ic_myaddr = NONE;
ic_servaddr = NONE;
goto drop_unlock;
- };
+ }
ic_dhcp_msgtype = mt;
@@ -1094,7 +1095,7 @@ static int __init ic_dynamic(void)
retries = CONF_SEND_RETRIES;
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
- for(;;) {
+ for (;;) {
#ifdef IPCONFIG_BOOTP
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3ec5ce0f5498..ebd2f2d532f6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
return NULL;
}
-static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+ return __ipip_bucket(&t->parms);
+}
static void ipip_tunnel_unlink(struct ip_tunnel *t)
{
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = 0;
- int prio = 0;
char name[IFNAMSIZ];
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -280,8 +274,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
ICMP in the real Internet is absolutely infeasible.
*/
struct iphdr *iph = (struct iphdr*)skb->data;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err;
@@ -336,8 +330,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
int hlen = iph->ihl<<2;
struct iphdr *eiph;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
__be32 rel_info = 0;
@@ -354,7 +348,7 @@ out:
default:
return 0;
case ICMP_PARAMETERPROB:
- n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
if (n < hlen)
return 0;
@@ -373,7 +367,7 @@ out:
return 0;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- n = ntohs(skb->h.icmph->un.frag.mtu);
+ n = ntohs(icmp_hdr(skb)->un.frag.mtu);
if (n < hlen+68)
return 0;
n -= hlen;
@@ -405,7 +399,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)eiph);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
memset(&fl, 0, sizeof(fl));
@@ -461,9 +455,10 @@ out:
#endif
}
-static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
+static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
+ struct sk_buff *skb)
{
- struct iphdr *inner_iph = skb->nh.iph;
+ struct iphdr *inner_iph = ip_hdr(skb);
if (INET_ECN_is_ce(outer_iph->tos))
IP_ECN_set_ce(inner_iph);
@@ -471,10 +466,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff
static int ipip_rcv(struct sk_buff *skb)
{
- struct iphdr *iph;
struct ip_tunnel *tunnel;
-
- iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
read_lock(&ipip_lock);
if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -486,8 +479,8 @@ static int ipip_rcv(struct sk_buff *skb)
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
@@ -521,7 +514,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
@@ -615,11 +608,12 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
@@ -630,7 +624,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 601e3df69258..0ebae413ae87 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
#include <linux/netfilter_ipv4.h>
#include <net/ipip.h>
#include <net/checksum.h>
+#include <net/netlink.h>
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
#define CONFIG_IP_PIMSM 1
@@ -302,8 +303,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
atomic_dec(&cache_resolve_queue_len);
- while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
- if (skb->nh.iph->version == 0) {
+ while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -479,7 +480,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
static struct mfc_cache *ipmr_cache_alloc(void)
{
struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
- if(c==NULL)
+ if (c==NULL)
return NULL;
c->mfc_un.res.minvif = MAXVIFS;
return c;
@@ -488,7 +489,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
static struct mfc_cache *ipmr_cache_alloc_unres(void)
{
struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
- if(c==NULL)
+ if (c==NULL)
return NULL;
skb_queue_head_init(&c->mfc_un.unres.unresolved);
c->mfc_un.unres.expires = jiffies + 10*HZ;
@@ -508,12 +509,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
* Play the pending entries through our router
*/
- while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
- if (skb->nh.iph->version == 0) {
+ while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
- nlh->nlmsg_len = skb->tail - (u8*)nlh;
+ nlh->nlmsg_len = (skb_tail_pointer(skb) -
+ (u8 *)nlh);
} else {
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -539,7 +541,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
{
struct sk_buff *skb;
- int ihl = pkt->nh.iph->ihl<<2;
+ const int ihl = ip_hdrlen(pkt);
struct igmphdr *igmp;
struct igmpmsg *msg;
int ret;
@@ -551,7 +553,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
#endif
skb = alloc_skb(128, GFP_ATOMIC);
- if(!skb)
+ if (!skb)
return -ENOBUFS;
#ifdef CONFIG_IP_PIMSM
@@ -561,14 +563,17 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
And all this only to mangle msg->im_msgtype and
to set msg->im_mbz to "mbz" :-)
*/
- msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
- skb->nh.raw = skb->h.raw = (u8*)msg;
- memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ msg = (struct igmpmsg *)skb_network_header(skb);
+ memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
msg->im_msgtype = IGMPMSG_WHOLEPKT;
msg->im_mbz = 0;
msg->im_vif = reg_vif_num;
- skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
- skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+ ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
+ sizeof(struct iphdr));
} else
#endif
{
@@ -577,10 +582,11 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
* Copy the IP header
*/
- skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
- memcpy(skb->data,pkt->data,ihl);
- skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
- msg = (struct igmpmsg*)skb->nh.iph;
+ skb->network_header = skb->tail;
+ skb_put(skb, ihl);
+ skb_copy_to_linear_data(skb, pkt->data, ihl);
+ ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
+ msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
skb->dst = dst_clone(pkt->dst);
@@ -592,8 +598,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
igmp->type =
msg->im_msgtype = assert;
igmp->code = 0;
- skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
- skb->h.raw = skb->nh.raw;
+ ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
+ skb->transport_header = skb->network_header;
}
if (mroute_socket == NULL) {
@@ -622,11 +628,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
{
int err;
struct mfc_cache *c;
+ const struct iphdr *iph = ip_hdr(skb);
spin_lock_bh(&mfc_unres_lock);
for (c=mfc_unres_queue; c; c=c->next) {
- if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
- c->mfc_origin == skb->nh.iph->saddr)
+ if (c->mfc_mcastgrp == iph->daddr &&
+ c->mfc_origin == iph->saddr)
break;
}
@@ -646,9 +653,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
/*
* Fill in the new cache entry
*/
- c->mfc_parent=-1;
- c->mfc_origin=skb->nh.iph->saddr;
- c->mfc_mcastgrp=skb->nh.iph->daddr;
+ c->mfc_parent = -1;
+ c->mfc_origin = iph->saddr;
+ c->mfc_mcastgrp = iph->daddr;
/*
* Reflect first query at mrouted.
@@ -734,7 +741,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
return 0;
}
- if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+ if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
return -EINVAL;
c=ipmr_cache_alloc();
@@ -788,7 +795,7 @@ static void mroute_clean_tables(struct sock *sk)
/*
* Shut down all active vif entries
*/
- for(i=0; i<maxvif; i++) {
+ for (i=0; i<maxvif; i++) {
if (!(vif_table[i].flags&VIFF_STATIC))
vif_delete(i);
}
@@ -858,119 +865,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
struct vifctl vif;
struct mfcctl mfc;
- if(optname!=MRT_INIT)
- {
- if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
+ if (optname != MRT_INIT) {
+ if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
return -EACCES;
}
- switch(optname)
- {
- case MRT_INIT:
- if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->num != IPPROTO_IGMP)
- return -EOPNOTSUPP;
- if(optlen!=sizeof(int))
- return -ENOPROTOOPT;
-
- rtnl_lock();
- if (mroute_socket) {
- rtnl_unlock();
- return -EADDRINUSE;
- }
-
- ret = ip_ra_control(sk, 1, mrtsock_destruct);
- if (ret == 0) {
- write_lock_bh(&mrt_lock);
- mroute_socket=sk;
- write_unlock_bh(&mrt_lock);
+ switch (optname) {
+ case MRT_INIT:
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->num != IPPROTO_IGMP)
+ return -EOPNOTSUPP;
+ if (optlen!=sizeof(int))
+ return -ENOPROTOOPT;
- ipv4_devconf.mc_forwarding++;
- }
+ rtnl_lock();
+ if (mroute_socket) {
rtnl_unlock();
- return ret;
- case MRT_DONE:
- if (sk!=mroute_socket)
- return -EACCES;
- return ip_ra_control(sk, 0, NULL);
- case MRT_ADD_VIF:
- case MRT_DEL_VIF:
- if(optlen!=sizeof(vif))
- return -EINVAL;
- if (copy_from_user(&vif,optval,sizeof(vif)))
- return -EFAULT;
- if(vif.vifc_vifi >= MAXVIFS)
- return -ENFILE;
- rtnl_lock();
- if (optname==MRT_ADD_VIF) {
- ret = vif_add(&vif, sk==mroute_socket);
- } else {
- ret = vif_delete(vif.vifc_vifi);
- }
- rtnl_unlock();
- return ret;
+ return -EADDRINUSE;
+ }
+
+ ret = ip_ra_control(sk, 1, mrtsock_destruct);
+ if (ret == 0) {
+ write_lock_bh(&mrt_lock);
+ mroute_socket=sk;
+ write_unlock_bh(&mrt_lock);
+
+ ipv4_devconf.mc_forwarding++;
+ }
+ rtnl_unlock();
+ return ret;
+ case MRT_DONE:
+ if (sk!=mroute_socket)
+ return -EACCES;
+ return ip_ra_control(sk, 0, NULL);
+ case MRT_ADD_VIF:
+ case MRT_DEL_VIF:
+ if (optlen!=sizeof(vif))
+ return -EINVAL;
+ if (copy_from_user(&vif,optval,sizeof(vif)))
+ return -EFAULT;
+ if (vif.vifc_vifi >= MAXVIFS)
+ return -ENFILE;
+ rtnl_lock();
+ if (optname==MRT_ADD_VIF) {
+ ret = vif_add(&vif, sk==mroute_socket);
+ } else {
+ ret = vif_delete(vif.vifc_vifi);
+ }
+ rtnl_unlock();
+ return ret;
/*
* Manipulate the forwarding caches. These live
* in a sort of kernel/user symbiosis.
*/
- case MRT_ADD_MFC:
- case MRT_DEL_MFC:
- if(optlen!=sizeof(mfc))
- return -EINVAL;
- if (copy_from_user(&mfc,optval, sizeof(mfc)))
- return -EFAULT;
- rtnl_lock();
- if (optname==MRT_DEL_MFC)
- ret = ipmr_mfc_delete(&mfc);
- else
- ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
- rtnl_unlock();
- return ret;
+ case MRT_ADD_MFC:
+ case MRT_DEL_MFC:
+ if (optlen!=sizeof(mfc))
+ return -EINVAL;
+ if (copy_from_user(&mfc,optval, sizeof(mfc)))
+ return -EFAULT;
+ rtnl_lock();
+ if (optname==MRT_DEL_MFC)
+ ret = ipmr_mfc_delete(&mfc);
+ else
+ ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+ rtnl_unlock();
+ return ret;
/*
* Control PIM assert.
*/
- case MRT_ASSERT:
- {
- int v;
- if(get_user(v,(int __user *)optval))
- return -EFAULT;
- mroute_do_assert=(v)?1:0;
- return 0;
- }
+ case MRT_ASSERT:
+ {
+ int v;
+ if (get_user(v,(int __user *)optval))
+ return -EFAULT;
+ mroute_do_assert=(v)?1:0;
+ return 0;
+ }
#ifdef CONFIG_IP_PIMSM
- case MRT_PIM:
- {
- int v, ret;
- if(get_user(v,(int __user *)optval))
- return -EFAULT;
- v = (v)?1:0;
- rtnl_lock();
- ret = 0;
- if (v != mroute_do_pim) {
- mroute_do_pim = v;
- mroute_do_assert = v;
+ case MRT_PIM:
+ {
+ int v, ret;
+ if (get_user(v,(int __user *)optval))
+ return -EFAULT;
+ v = (v)?1:0;
+ rtnl_lock();
+ ret = 0;
+ if (v != mroute_do_pim) {
+ mroute_do_pim = v;
+ mroute_do_assert = v;
#ifdef CONFIG_IP_PIMSM_V2
- if (mroute_do_pim)
- ret = inet_add_protocol(&pim_protocol,
- IPPROTO_PIM);
- else
- ret = inet_del_protocol(&pim_protocol,
- IPPROTO_PIM);
- if (ret < 0)
- ret = -EAGAIN;
+ if (mroute_do_pim)
+ ret = inet_add_protocol(&pim_protocol,
+ IPPROTO_PIM);
+ else
+ ret = inet_del_protocol(&pim_protocol,
+ IPPROTO_PIM);
+ if (ret < 0)
+ ret = -EAGAIN;
#endif
- }
- rtnl_unlock();
- return ret;
}
+ rtnl_unlock();
+ return ret;
+ }
#endif
- /*
- * Spurious command, or MRT_VERSION which you cannot
- * set.
- */
- default:
- return -ENOPROTOOPT;
+ /*
+ * Spurious command, or MRT_VERSION which you cannot
+ * set.
+ */
+ default:
+ return -ENOPROTOOPT;
}
}
@@ -983,7 +988,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
int olr;
int val;
- if(optname!=MRT_VERSION &&
+ if (optname!=MRT_VERSION &&
#ifdef CONFIG_IP_PIMSM
optname!=MRT_PIM &&
#endif
@@ -997,17 +1002,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
if (olr < 0)
return -EINVAL;
- if(put_user(olr,optlen))
+ if (put_user(olr,optlen))
return -EFAULT;
- if(optname==MRT_VERSION)
+ if (optname==MRT_VERSION)
val=0x0305;
#ifdef CONFIG_IP_PIMSM
- else if(optname==MRT_PIM)
+ else if (optname==MRT_PIM)
val=mroute_do_pim;
#endif
else
val=mroute_do_assert;
- if(copy_to_user(optval,&val,olr))
+ if (copy_to_user(optval,&val,olr))
return -EFAULT;
return 0;
}
@@ -1023,48 +1028,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
struct vif_device *vif;
struct mfc_cache *c;
- switch(cmd)
- {
- case SIOCGETVIFCNT:
- if (copy_from_user(&vr,arg,sizeof(vr)))
- return -EFAULT;
- if(vr.vifi>=maxvif)
- return -EINVAL;
- read_lock(&mrt_lock);
- vif=&vif_table[vr.vifi];
- if(VIF_EXISTS(vr.vifi)) {
- vr.icount=vif->pkt_in;
- vr.ocount=vif->pkt_out;
- vr.ibytes=vif->bytes_in;
- vr.obytes=vif->bytes_out;
- read_unlock(&mrt_lock);
-
- if (copy_to_user(arg,&vr,sizeof(vr)))
- return -EFAULT;
- return 0;
- }
+ switch (cmd) {
+ case SIOCGETVIFCNT:
+ if (copy_from_user(&vr,arg,sizeof(vr)))
+ return -EFAULT;
+ if (vr.vifi>=maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif=&vif_table[vr.vifi];
+ if (VIF_EXISTS(vr.vifi)) {
+ vr.icount=vif->pkt_in;
+ vr.ocount=vif->pkt_out;
+ vr.ibytes=vif->bytes_in;
+ vr.obytes=vif->bytes_out;
read_unlock(&mrt_lock);
- return -EADDRNOTAVAIL;
- case SIOCGETSGCNT:
- if (copy_from_user(&sr,arg,sizeof(sr)))
- return -EFAULT;
- read_lock(&mrt_lock);
- c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
- if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
-
- if (copy_to_user(arg,&sr,sizeof(sr)))
- return -EFAULT;
- return 0;
- }
+ if (copy_to_user(arg,&vr,sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT:
+ if (copy_from_user(&sr,arg,sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
read_unlock(&mrt_lock);
- return -EADDRNOTAVAIL;
- default:
- return -ENOIOCTLCMD;
+
+ if (copy_to_user(arg,&sr,sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
}
}
@@ -1076,7 +1080,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
v=&vif_table[0];
- for(ct=0;ct<maxvif;ct++,v++) {
+ for (ct=0;ct<maxvif;ct++,v++) {
if (v->dev==ptr)
vif_delete(ct);
}
@@ -1096,11 +1100,17 @@ static struct notifier_block ip_mr_notifier={
static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
- struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+ struct iphdr *iph;
+ struct iphdr *old_iph = ip_hdr(skb);
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->version = 4;
- iph->tos = skb->nh.iph->tos;
- iph->ttl = skb->nh.iph->ttl;
+ iph->tos = old_iph->tos;
+ iph->ttl = old_iph->ttl;
iph->frag_off = 0;
iph->daddr = daddr;
iph->saddr = saddr;
@@ -1110,8 +1120,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
ip_select_ident(iph, skb->dst, NULL);
ip_send_check(iph);
- skb->h.ipiph = skb->nh.iph;
- skb->nh.iph = iph;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
nf_reset(skb);
}
@@ -1134,7 +1142,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &vif_table[vifi];
struct net_device *dev;
struct rtable *rt;
@@ -1200,8 +1208,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
dst_release(skb->dst);
skb->dst = &rt->u.dst;
- iph = skb->nh.iph;
- ip_decrease_ttl(iph);
+ ip_decrease_ttl(ip_hdr(skb));
/* FIXME: forward and output firewalls used to be called here.
* What do we do with netfilter? -- RR */
@@ -1301,7 +1308,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
* Forward the frame
*/
for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
- if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
+ if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -1347,7 +1354,7 @@ int ip_mr_input(struct sk_buff *skb)
if (IPCB(skb)->opt.router_alert) {
if (ip_call_ra_chain(skb))
return 0;
- } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
/* IGMPv1 (and broken IGMPv2 implementations sort of
Cisco IOS <= 11.2(8)) do not put router alert
option to IGMP packets destined to routable
@@ -1366,7 +1373,7 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+ cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
/*
* No usable cache entry
@@ -1426,14 +1433,15 @@ int pim_rcv_v1(struct sk_buff * skb)
if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
goto drop;
- pim = (struct igmphdr*)skb->h.raw;
+ pim = igmp_hdr(skb);
if (!mroute_do_pim ||
skb->len < sizeof(*pim) + sizeof(*encap) ||
pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
goto drop;
- encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+ encap = (struct iphdr *)(skb_transport_header(skb) +
+ sizeof(struct igmphdr));
/*
Check that:
a. packet is really destinted to a multicast group
@@ -1455,9 +1463,9 @@ int pim_rcv_v1(struct sk_buff * skb)
if (reg_dev == NULL)
goto drop;
- skb->mac.raw = skb->nh.raw;
+ skb->mac_header = skb->network_header;
skb_pull(skb, (u8*)encap - skb->data);
- skb->nh.iph = (struct iphdr *)skb->data;
+ skb_reset_network_header(skb);
skb->dev = reg_dev;
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
@@ -1486,7 +1494,7 @@ static int pim_rcv(struct sk_buff * skb)
if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
goto drop;
- pim = (struct pimreghdr*)skb->h.raw;
+ pim = (struct pimreghdr *)skb_transport_header(skb);
if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
(pim->flags&PIM_NULL_REGISTER) ||
(ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1494,7 +1502,8 @@ static int pim_rcv(struct sk_buff * skb)
goto drop;
/* check if the inner packet is destined to mcast group */
- encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+ encap = (struct iphdr *)(skb_transport_header(skb) +
+ sizeof(struct pimreghdr));
if (!MULTICAST(encap->daddr) ||
encap->tot_len == 0 ||
ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
@@ -1510,9 +1519,9 @@ static int pim_rcv(struct sk_buff * skb)
if (reg_dev == NULL)
goto drop;
- skb->mac.raw = skb->nh.raw;
+ skb->mac_header = skb->network_header;
skb_pull(skb, (u8*)encap - skb->data);
- skb->nh.iph = (struct iphdr *)skb->data;
+ skb_reset_network_header(skb);
skb->dev = reg_dev;
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
@@ -1537,7 +1546,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
int ct;
struct rtnexthop *nhp;
struct net_device *dev = vif_table[c->mfc_parent].dev;
- u8 *b = skb->tail;
+ u8 *b = skb_tail_pointer(skb);
struct rtattr *mp_head;
if (dev)
@@ -1557,12 +1566,12 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
}
}
mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
rtm->rtm_type = RTN_MULTICAST;
return 1;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
@@ -1577,6 +1586,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
if (cache==NULL) {
struct sk_buff *skb2;
+ struct iphdr *iph;
struct net_device *dev;
int vif;
@@ -1596,11 +1606,13 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
return -ENOMEM;
}
- skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
- skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
- skb2->nh.iph->saddr = rt->rt_src;
- skb2->nh.iph->daddr = rt->rt_dst;
- skb2->nh.iph->version = 0;
+ skb_push(skb2, sizeof(struct iphdr));
+ skb_reset_network_header(skb2);
+ iph = ip_hdr(skb2);
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
+ iph->version = 0;
err = ipmr_cache_unresolved(vif, skb2);
read_unlock(&mrt_lock);
return err;
@@ -1625,7 +1637,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
loff_t pos)
{
for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
- if(!VIF_EXISTS(iter->ct))
+ if (!VIF_EXISTS(iter->ct))
continue;
if (pos-- == 0)
return &vif_table[iter->ct];
@@ -1649,7 +1661,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return ipmr_vif_seq_idx(iter, 0);
while (++iter->ct < maxvif) {
- if(!VIF_EXISTS(iter->ct))
+ if (!VIF_EXISTS(iter->ct))
continue;
return &vif_table[iter->ct];
}
@@ -1680,7 +1692,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations ipmr_vif_seq_ops = {
+static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
.next = ipmr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
@@ -1732,14 +1744,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
it->cache = mfc_cache_array;
read_lock(&mrt_lock);
for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
- for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
+ for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
if (pos-- == 0)
return mfc;
read_unlock(&mrt_lock);
it->cache = &mfc_unres_queue;
spin_lock_bh(&mfc_unres_lock);
- for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
+ for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
if (pos-- == 0)
return mfc;
spin_unlock_bh(&mfc_unres_lock);
@@ -1829,9 +1841,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
mfc->mfc_un.res.wrong_if);
if (it->cache != &mfc_unres_queue) {
- for(n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++ ) {
- if(VIF_EXISTS(n)
+ for (n = mfc->mfc_un.res.minvif;
+ n < mfc->mfc_un.res.maxvif; n++ ) {
+ if (VIF_EXISTS(n)
&& mfc->mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
@@ -1843,7 +1855,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
.next = ipmr_mfc_seq_next,
.stop = ipmr_mfc_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a493..15ad5dd2d984 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -331,14 +331,14 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(*pskb);
struct tcphdr *th;
__u32 seq;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -406,14 +406,14 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(*pskb);
struct tcphdr *th;
__u32 seq;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = {
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
- struct iphdr *iph;
int diff;
int o_offset;
int o_left;
@@ -603,12 +602,11 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
skb_put(skb, diff);
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
- memcpy(skb->data + o_offset, n_buf, n_len);
+ skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
}
/* must update the iph total length here */
- iph = skb->nh.iph;
- iph->tot_len = htons(skb->len);
+ ip_hdr(skb)->tot_len = htons(skb->len);
LeaveFunction(9);
return 0;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 24d7b66eb6d2..f005a2f929f4 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
__be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
__be16 dport; /* destination port to forward */
@@ -381,7 +381,7 @@ struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
__be16 _ports[2], *pptr;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
skb = ip_defrag(skb, user);
if (skb)
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
return skb;
}
@@ -557,9 +557,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int inout)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
unsigned int icmp_offset = iph->ihl*4;
- struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+ struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
+ icmp_offset);
struct iphdr *ciph = (struct iphdr *)(icmph + 1);
if (inout) {
@@ -617,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
*pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -659,7 +660,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -680,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
}
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -712,8 +712,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
return th->rst;
@@ -740,14 +739,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (skb->ipvs_property)
return NF_ACCEPT;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(pskb, &related);
if (related)
return verdict;
skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
pp = ip_vs_proto_get(iph->protocol);
@@ -755,12 +754,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
return NF_ACCEPT;
/* reassemble IP fragments */
- if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
*pskb = skb;
}
@@ -810,8 +809,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
goto drop;
skb = *pskb;
- skb->nh.iph->saddr = cp->vaddr;
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->saddr = cp->vaddr;
+ ip_send_check(ip_hdr(skb));
/* For policy routing, packets originating from this
* machine itself may be routed differently to packets
@@ -861,7 +860,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_vs_gather_frags(skb,
hooknum == NF_IP_LOCAL_IN ?
IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -870,7 +869,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
*pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -905,7 +904,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
verdict = NF_DROP;
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -966,19 +964,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
|| skb->dev == &loopback_dev || skb->sk)) {
IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
skb->pkt_type,
- skb->nh.iph->protocol,
- NIPQUAD(skb->nh.iph->daddr));
+ ip_hdr(skb)->protocol,
+ NIPQUAD(ip_hdr(skb)->daddr));
return NF_ACCEPT;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
if (related)
return verdict;
skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
/* Protocol supported? */
@@ -1064,7 +1062,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
{
int r;
- if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+ if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
return ip_vs_in_icmp(pskb, &r, hooknum);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 502111fba872..dcf5d46aaa5e 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_dh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 847c47af040c..344ddbbdc756 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -159,10 +159,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 0;
if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(*pskb);
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
@@ -262,14 +262,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Detecting whether it is passive
*/
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(*pskb);
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c801273cb881..052f4ed59174 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 23f9b9e73c85..6225acac7a3b 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest;
struct ip_vs_lblcr_table *tbl;
struct ip_vs_lblcr_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b09317..a842676e1c69 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb,
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 16a9ebee2fe6..e65577a77006 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -76,16 +76,15 @@ tcp_conn_schedule(struct sk_buff *skb,
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
if (th->syn &&
- (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, th->dest))) {
+ (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -143,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb,
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(*pskb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -194,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(*pskb) + tcphoff;
tcph->dest = cp->dport;
/*
@@ -224,15 +223,15 @@ tcp_dnat_handler(struct sk_buff **pskb,
static int
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
- unsigned int tcphoff = skb->nh.iph->ihl*4;
+ const unsigned int tcphoff = ip_hdrlen(skb);
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
skb->len - tcphoff,
- skb->nh.iph->protocol, skb->csum)) {
+ ip_hdr(skb)->protocol, skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 03f0a414cfa4..8ee5fe6a101d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,7 +22,7 @@
#include <linux/udp.h>
#include <net/ip_vs.h>
-
+#include <net/ip.h>
static struct ip_vs_conn *
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -82,15 +82,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
- uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ uh = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
- if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, uh->dest))) {
+ if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, uh->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int udphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb,
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(*pskb) + udphoff;
udph->source = cp->vport;
/*
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ unsigned int udphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb,
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(*pskb) + udphoff;
udph->dest = cp->dport;
/*
@@ -239,7 +239,7 @@ static int
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
struct udphdr _udph, *uh;
- unsigned int udphoff = skb->nh.iph->ihl*4;
+ const unsigned int udphoff = ip_hdrlen(skb);
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
if (uh == NULL)
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr,
- skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
skb->len - udphoff,
- skb->nh.iph->protocol,
+ ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 338668f88fe2..1b25b00ef1e1 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_sh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index e1f77bd7c9a5..900ce29db382 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
u8 tos = iph->tos;
int mtu;
struct flowi fl = {
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct rtable *rt; /* Route to the other host */
int mtu;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
EnterFunction(10);
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
goto tx_error;
- skb->nh.iph->daddr = cp->daddr;
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->daddr = cp->daddr;
+ ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
@@ -320,19 +320,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos;
__be16 df = old_iph->frag_off;
+ sk_buff_data_t old_transport_header = skb->transport_header;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
int mtu;
EnterFunction(10);
- if (skb->protocol != __constant_htons(ETH_P_IP)) {
+ if (skb->protocol != htons(ETH_P_IP)) {
IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
"ETH_P_IP: %d, skb protocol: %d\n",
- __constant_htons(ETH_P_IP), skb->protocol);
+ htons(ETH_P_IP), skb->protocol);
goto tx_error;
}
@@ -350,9 +351,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
- df |= (old_iph->frag_off&__constant_htons(IP_DF));
+ df |= (old_iph->frag_off & htons(IP_DF));
- if ((old_iph->frag_off&__constant_htons(IP_DF))
+ if ((old_iph->frag_off & htons(IP_DF))
&& mtu < ntohs(old_iph->tot_len)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
@@ -377,15 +378,16 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = (void *) old_iph;
+ skb->transport_header = old_transport_header;
/* fix old IP header checksum */
ip_send_check(old_iph);
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
@@ -395,7 +397,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/*
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
@@ -435,7 +437,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
int mtu;
EnterFunction(10);
@@ -445,7 +447,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -460,7 +462,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -514,12 +516,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
* mangle and send the packet here (only for VS/NAT)
*/
- if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836fc..b03c5ca2c823 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
spin_unlock_bh(&state_lock);
break;
- };
+ }
return NOTIFY_DONE;
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6069a11514f6..b44192924f95 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -10,7 +10,7 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ const struct iphdr *iph = ip_hdr(*pskb);
struct rtable *rt;
struct flowi fl = {};
struct dst_entry *odst;
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
rt_info->tos = iph->tos;
rt_info->daddr = iph->daddr;
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
const struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- struct iphdr *iph = (*pskb)->nh.iph;
+ const struct iphdr *iph = ip_hdr(*pskb);
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 601808c796ec..46509fae9fd8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
-# connection tracking, helpers and protocols
-config IP_NF_CT_ACCT
- bool "Connection tracking flow accounting"
- depends on IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- keep per-flow packet and byte counters.
-
- Those counters can be used for flow-based accounting or the
- `connbytes' match.
-
- If unsure, say `N'.
-
-config IP_NF_CONNTRACK_MARK
- bool 'Connection mark tracking support'
- depends on IP_NF_CONNTRACK
- help
- This option enables support for connection marks, used by the
- `CONNMARK' target and `connmark' match. Similar to the mark value
- of packets, but this mark value is kept in the conntrack session
- instead of the individual packets.
-
-config IP_NF_CONNTRACK_SECMARK
- bool 'Connection tracking security mark support'
- depends on IP_NF_CONNTRACK && NETWORK_SECMARK
- help
- This option enables security markings to be applied to
- connections. Typically they are copied to connections from
- packets using the CONNSECMARK target and copied back from
- connections to packets with the same target, with the packets
- being originally labeled via SECMARK.
-
- If unsure, say 'N'.
-
-config IP_NF_CONNTRACK_EVENTS
- bool "Connection tracking events (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- provide a notifier chain that can be used by other kernel code
- to get notified about changes in the connection tracking state.
-
- IF unsure, say `N'.
-
-config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
- depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
- depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
- depends on IP_NF_NAT=n || IP_NF_NAT
- help
- This option enables support for a netlink-based userspace interface
-
-
-config IP_NF_CT_PROTO_SCTP
- tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- With this option enabled, the connection tracking code will
- be able to do state tracking on SCTP connections.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_FTP
- tristate "FTP protocol support"
- depends on IP_NF_CONNTRACK
- help
- Tracking FTP connections is problematic: special helpers are
- required for tracking them, and doing masquerading and other forms
- of Network Address Translation on them.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_IRC
- tristate "IRC protocol support"
- depends on IP_NF_CONNTRACK
- ---help---
- There is a commonly-used extension to IRC called
- Direct Client-to-Client Protocol (DCC). This enables users to send
- files to each other, and also chat to each other without the need
- of a server. DCC Sending is used anywhere you send files over IRC,
- and DCC Chat is most commonly used by Eggdrop bots. If you are
- using NAT, this extension will enable you to send files and initiate
- chats. Note that you do NOT need this extension to get files or
- have others initiate chats, or everything else in IRC.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_NETBIOS_NS
- tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- NetBIOS name service requests are sent as broadcast messages from an
- unprivileged port and responded to with unicast messages to the
- same port. This make them hard to firewall properly because connection
- tracking doesn't deal with broadcasts. This helper tracks locally
- originating NetBIOS name service requests and the corresponding
- responses. It relies on correct IP address configuration, specifically
- netmask and broadcast address. When properly configured, the output
- of "ip address show" should look similar to this:
-
- $ ip -4 address show eth0
- 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
- inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_TFTP
- tristate "TFTP protocol support"
- depends on IP_NF_CONNTRACK
- help
- TFTP connection tracking helper, this is required depending
- on how restrictive your ruleset is.
- If you are using a tftp client behind -j SNAT or -j MASQUERADING
- you will need this.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_AMANDA
- tristate "Amanda backup protocol support"
- depends on IP_NF_CONNTRACK
- select TEXTSEARCH
- select TEXTSEARCH_KMP
- help
- If you are running the Amanda backup package <http://www.amanda.org/>
- on this machine or machines that will be MASQUERADED through this
- machine, then you may want to enable this feature. This allows the
- connection tracking and natting code to allow the sub-channels that
- Amanda requires for communication of the backup data, messages and
- index.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_PPTP
- tristate 'PPTP protocol support'
- depends on IP_NF_CONNTRACK
- help
- This module adds support for PPTP (Point to Point Tunnelling
- Protocol, RFC2637) connection tracking and NAT.
-
- If you are running PPTP sessions over a stateful firewall or NAT
- box, you may want to enable this feature.
-
- Please note that not all PPTP modes of operation are supported yet.
- For more info, read top of the file
- net/ipv4/netfilter/ip_conntrack_pptp.c
-
- If you want to compile it as a module, say M here and read
- Documentation/modules.txt. If unsure, say `N'.
-
-config IP_NF_H323
- tristate 'H.323 protocol support (EXPERIMENTAL)'
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- H.323 is a VoIP signalling protocol from ITU-T. As one of the most
- important VoIP protocols, it is widely used by voice hardware and
- software including voice gateways, IP phones, Netmeeting, OpenPhone,
- Gnomemeeting, etc.
-
- With this module you can support H.323 on a connection tracking/NAT
- firewall.
-
- This module supports RAS, Fast Start, H.245 Tunnelling, Call
- Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
- whiteboard, file transfer, etc. For more information, please
- visit http://nath323.sourceforge.net/.
-
- If you want to compile it as a module, say 'M' here and read
- Documentation/modules.txt. If unsure, say 'N'.
-
-config IP_NF_SIP
- tristate "SIP protocol support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- SIP is an application-layer control protocol that can establish,
- modify, and terminate multimedia sessions (conferences) such as
- Internet telephony calls. With the ip_conntrack_sip and
- the ip_nat_sip modules you can support the protocol on a connection
- tracking/NATing firewall.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
config IP_NF_QUEUE
tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
help
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG
To compile it as a module, choose M here. If unsure, say N.
-# NAT + specific targets: ip_conntrack
-config IP_NF_NAT
- tristate "Full NAT"
- depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
- help
- The Full NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in iptables: see the man page for iptables(8).
-
- To compile it as a module, choose M here. If unsure, say N.
-
# NAT + specific targets: nf_conntrack
config NF_NAT
tristate "Full NAT"
@@ -383,11 +190,6 @@ config NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_NEEDED
- bool
- depends on IP_NF_NAT
- default y
-
config NF_NAT_NEEDED
bool
depends on NF_NAT
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE
config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
REDIRECT is a special case of NAT: all incoming connections are
mapped onto the incoming interface's address, causing the packets to
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP
config IP_NF_TARGET_SAME
tristate "SAME target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
This option adds a `SAME' target, which works like the standard SNAT
target, but attempts to give clients the same IP for all connections.
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_SNMP_BASIC
- tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_NAT
- ---help---
-
- This module implements an Application Layer Gateway (ALG) for
- SNMP payloads. In conjunction with NAT, it allows a network
- management system to access multiple private networks with
- conflicting addresses. It works by modifying IP addresses
- inside SNMP payloads to match IP-layer NAT mapping.
-
- This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
- To compile it as a module, choose M here. If unsure, say N.
-
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_NAT
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE
tristate
depends on NF_NAT && NF_CT_PROTO_GRE
-config IP_NF_NAT_FTP
- tristate
- depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
- default IP_NF_NAT && IP_NF_FTP
-
config NF_NAT_FTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_FTP
-config IP_NF_NAT_IRC
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_IRC=y
- default m if IP_NF_IRC=m
-
config NF_NAT_IRC
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_IRC
-config IP_NF_NAT_TFTP
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_TFTP=y
- default m if IP_NF_TFTP=m
-
config NF_NAT_TFTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_TFTP
-config IP_NF_NAT_AMANDA
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_AMANDA=y
- default m if IP_NF_AMANDA=m
-
config NF_NAT_AMANDA
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_AMANDA
-config IP_NF_NAT_PPTP
- tristate
- depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
- default IP_NF_NAT if IP_NF_PPTP=y
- default m if IP_NF_PPTP=m
-
config NF_NAT_PPTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_PPTP
select NF_NAT_PROTO_GRE
-config IP_NF_NAT_H323
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_H323=y
- default m if IP_NF_H323=m
-
config NF_NAT_H323
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_H323
-config IP_NF_NAT_SIP
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_SIP=y
- default m if IP_NF_SIP=m
-
config NF_NAT_SIP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
+ depends on NF_CONNTRACK_IPV4
+ select NF_CONNTRACK_MARK
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 6625ec68180c..409d273f6f82 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,8 +2,6 @@
# Makefile for the netfilter modules on top of IPv4.
#
-# objects for the standalone - connection tracking / NAT
-ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
# objects for l3 independent conntrack
nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-ifneq ($(CONFIG_NF_NAT),)
+nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
-else
-iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
-endif
-
-ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
-ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
-
-ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
-ip_nat_h323-objs := ip_nat_helper_h323.o
# connection tracking
-obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
-# conntrack netlink interface
-obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
-
-
-# SCTP protocol connection tracking
-obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
-
-# connection tracking helpers
-obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
-obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
-obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
-obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
-obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
-obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
-obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
-obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
-
-# NAT helpers (ip_conntrack)
-obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
-obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
-obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
-obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
-obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
-obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
-obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
-
# NAT helpers (nf_conntrack)
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5170f5c75f9d..cae41215e3c7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -166,13 +166,9 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
return 0;
}
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- unsigned long odev;
- memcpy(&odev, outdev + i*sizeof(unsigned long),
- sizeof(unsigned long));
- ret |= (odev
- ^ ((const unsigned long *)arpinfo->outiface)[i])
- & ((const unsigned long *)arpinfo->outiface_mask)[i];
+ for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+ ret |= (outdev[i] ^ arpinfo->outiface[i])
+ & arpinfo->outiface_mask[i];
}
if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
@@ -249,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- arp = (*pskb)->nh.arph;
+ arp = arp_hdr(*pskb);
do {
if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
struct arpt_entry_target *t;
@@ -301,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
t->data);
/* Target might have changed stuff. */
- arp = (*pskb)->nh.arph;
+ arp = arp_hdr(*pskb);
if (verdict == ARPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48f..6298d404e7c7 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -30,35 +30,35 @@ target(struct sk_buff **pskb,
*pskb = nskb;
}
- arp = (*pskb)->nh.arph;
- arpptr = (*pskb)->nh.raw + sizeof(*arp);
+ arp = arp_hdr(*pskb);
+ arpptr = skb_network_header(*pskb) + sizeof(*arp);
pln = arp->ar_pln;
hln = arp->ar_hln;
/* We assume that pln and hln were checked in the match */
if (mangle->flags & ARPT_MANGLE_SDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, mangle->src_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_SIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_s.src_ip, pln);
}
arpptr += pln;
if (mangle->flags & ARPT_MANGLE_TDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, mangle->tgt_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_TIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
}
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index 4f561f52c83a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/* Amanda extension for IP connection tracking, Version 0.2
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on HW's ip_conntrack_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_conntrack_amanda.o [master_timeout=n]
- *
- * Where master_timeout is the timeout (in seconds) of the master
- * connection (port 10080). This defaults to 5 minutes but if
- * your clients take longer than 5 minutes to do their work
- * before getting back to the Amanda server, you can increase
- * this value.
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/textsearch.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-static unsigned int master_timeout = 300;
-static char *ts_algo = "kmp";
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda connection tracking module");
-MODULE_LICENSE("GPL");
-module_param(master_timeout, uint, 0600);
-MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-module_param(ts_algo, charp, 0400);
-MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-
-unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
-
-enum amanda_strings {
- SEARCH_CONNECT,
- SEARCH_NEWLINE,
- SEARCH_DATA,
- SEARCH_MESG,
- SEARCH_INDEX,
-};
-
-static struct {
- char *string;
- size_t len;
- struct ts_config *ts;
-} search[] = {
- [SEARCH_CONNECT] = {
- .string = "CONNECT ",
- .len = 8,
- },
- [SEARCH_NEWLINE] = {
- .string = "\n",
- .len = 1,
- },
- [SEARCH_DATA] = {
- .string = "DATA ",
- .len = 5,
- },
- [SEARCH_MESG] = {
- .string = "MESG ",
- .len = 5,
- },
- [SEARCH_INDEX] = {
- .string = "INDEX ",
- .len = 6,
- },
-};
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ts_state ts;
- struct ip_conntrack_expect *exp;
- unsigned int dataoff, start, stop, off, i;
- char pbuf[sizeof("65535")], *tmp;
- u_int16_t port, len;
- int ret = NF_ACCEPT;
- typeof(ip_nat_amanda_hook) ip_nat_amanda;
-
- /* Only look at packets from the Amanda server */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* increase the UDP timeout of the master connection as replies from
- * Amanda clients to the server can be quite delayed */
- ip_ct_refresh(ct, *pskb, master_timeout * HZ);
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
- if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
-
- memset(&ts, 0, sizeof(ts));
- start = skb_find_text(*pskb, dataoff, (*pskb)->len,
- search[SEARCH_CONNECT].ts, &ts);
- if (start == UINT_MAX)
- goto out;
- start += dataoff + search[SEARCH_CONNECT].len;
-
- memset(&ts, 0, sizeof(ts));
- stop = skb_find_text(*pskb, start, (*pskb)->len,
- search[SEARCH_NEWLINE].ts, &ts);
- if (stop == UINT_MAX)
- goto out;
- stop += start;
-
- for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
- memset(&ts, 0, sizeof(ts));
- off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
- if (off == UINT_MAX)
- continue;
- off += start + search[i].len;
-
- len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
- if (skb_copy_bits(*pskb, off, pbuf, len))
- break;
- pbuf[len] = '\0';
-
- port = simple_strtoul(pbuf, &tmp, 10);
- len = tmp - pbuf;
- if (port == 0 || len > 5)
- break;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->tuple.dst.u.tcp.port = htons(port);
-
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
-
- /* RCU read locked by nf_hook_slow */
- ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
- if (ip_nat_amanda)
- ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
- len, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- }
-
-out:
- return ret;
-}
-
-static struct ip_conntrack_helper amanda_helper = {
- .max_expected = 3,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "amanda",
-
- .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
- .dst = { .protonum = IPPROTO_UDP },
- },
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
- },
-};
-
-static void __exit ip_conntrack_amanda_fini(void)
-{
- int i;
-
- ip_conntrack_helper_unregister(&amanda_helper);
- for (i = 0; i < ARRAY_SIZE(search); i++)
- textsearch_destroy(search[i].ts);
-}
-
-static int __init ip_conntrack_amanda_init(void)
-{
- int ret, i;
-
- ret = -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(search); i++) {
- search[i].ts = textsearch_prepare(ts_algo, search[i].string,
- search[i].len,
- GFP_KERNEL, TS_AUTOLOAD);
- if (search[i].ts == NULL)
- goto err;
- }
- ret = ip_conntrack_helper_register(&amanda_helper);
- if (ret < 0)
- goto err;
- return 0;
-
-err:
- for (; i >= 0; i--) {
- if (search[i].ts)
- textsearch_destroy(search[i].ts);
- }
- return ret;
-}
-
-module_init(ip_conntrack_amanda_init);
-module_exit(ip_conntrack_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 23b99ae2cc37..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1550 +0,0 @@
-/* Connection state tracking for netfilter. This is separated from,
- but required by, the NAT layer; it can also be used by an iptables
- extension. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * - add usage/reference counts to ip_conntrack_expect
- * - export ip_conntrack[_expect]_{find_get,put} functions
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/stddef.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/err.h>
-#include <linux/percpu.h>
-#include <linux/moduleparam.h>
-#include <linux/notifier.h>
-
-/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
- registrations, conntrack timers*/
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#define IP_CONNTRACK_VERSION "2.4"
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_conntrack_lock);
-
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-
-void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
-static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size __read_mostly = 0;
-int ip_conntrack_max __read_mostly;
-struct list_head *ip_conntrack_hash __read_mostly;
-static struct kmem_cache *ip_conntrack_cachep __read_mostly;
-static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
-struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid __read_mostly;
-static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc __read_mostly;
-
-static unsigned int ip_conntrack_next_id;
-static unsigned int ip_conntrack_expect_next_id;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
-
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
-{
- DEBUGP("ecache: delivering events for %p\n", ecache->ct);
- if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
- atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
- ecache->ct);
- ecache->events = 0;
- ip_conntrack_put(ecache->ct);
- ecache->ct = NULL;
-}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling or freeing the skb */
-void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- if (ecache->ct == ct)
- __ip_ct_deliver_cached_events(ecache);
- local_bh_enable();
-}
-
-void __ip_ct_event_cache_init(struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __ip_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called while
- * packets are still passing through the code */
-static void ip_ct_event_cache_flush(void)
-{
- struct ip_conntrack_ecache *ecache;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- ecache = &per_cpu(ip_conntrack_ecache, cpu);
- if (ecache->ct)
- ip_conntrack_put(ecache->ct);
- }
-}
-#else
-static inline void ip_ct_event_cache_flush(void) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int ip_conntrack_hash_rnd_initted;
-static unsigned int ip_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
- unsigned int size, unsigned int rnd)
-{
- return (jhash_3words((__force u32)tuple->src.ip,
- ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
- (tuple->src.u.all | (tuple->dst.u.all << 16)),
- rnd) % size);
-}
-
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
-{
- return __hash_conntrack(tuple, ip_conntrack_htable_size,
- ip_conntrack_hash_rnd);
-}
-
-int
-ip_ct_get_tuple(const struct iphdr *iph,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_protocol *protocol)
-{
- /* Never happen */
- if (iph->frag_off & htons(IP_OFFSET)) {
- printk("ip_conntrack_core: Frag of proto %u.\n",
- iph->protocol);
- return 0;
- }
-
- tuple->src.ip = iph->saddr;
- tuple->dst.ip = iph->daddr;
- tuple->dst.protonum = iph->protocol;
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- return protocol->pkt_to_tuple(skb, dataoff, tuple);
-}
-
-int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig,
- const struct ip_conntrack_protocol *protocol)
-{
- inverse->src.ip = orig->dst.ip;
- inverse->dst.ip = orig->src.ip;
- inverse->dst.protonum = orig->dst.protonum;
- inverse->dst.dir = !orig->dst.dir;
-
- return protocol->invert_tuple(inverse, orig);
-}
-
-
-/* ip_conntrack_expect helper functions */
-void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
-{
- IP_NF_ASSERT(!timer_pending(&exp->timeout));
- list_del(&exp->list);
- CONNTRACK_STAT_INC(expect_delete);
- exp->master->expecting--;
- ip_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
- struct ip_conntrack_expect *exp = (void *)ul_expect;
-
- write_lock_bh(&ip_conntrack_lock);
- ip_ct_unlink_expect(exp);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(exp);
-}
-
-struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
- return i;
- }
- return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- read_lock_bh(&ip_conntrack_lock);
- i = __ip_conntrack_expect_find(tuple);
- if (i)
- atomic_inc(&i->use);
- read_unlock_bh(&ip_conntrack_lock);
-
- return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct ip_conntrack_expect *
-find_expectation(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
- && is_confirmed(i->master)) {
- if (i->flags & IP_CT_EXPECT_PERMANENT) {
- atomic_inc(&i->use);
- return i;
- } else if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- return i;
- }
- }
- }
- return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void ip_ct_remove_expectations(struct ip_conntrack *ct)
-{
- struct ip_conntrack_expect *i, *tmp;
-
- /* Optimization: most connection never expect any others. */
- if (ct->expecting == 0)
- return;
-
- list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- }
-}
-
-static void
-clean_from_lists(struct ip_conntrack *ct)
-{
- DEBUGP("clean_from_lists(%p)\n", ct);
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
-
- /* Destroy all pending expectations */
- ip_ct_remove_expectations(ct);
-}
-
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
-{
- struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
- struct ip_conntrack_protocol *proto;
- struct ip_conntrack_helper *helper;
- typeof(ip_conntrack_destroyed) destroyed;
-
- DEBUGP("destroy_conntrack(%p)\n", ct);
- IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
- IP_NF_ASSERT(!timer_pending(&ct->timeout));
-
- ip_conntrack_event(IPCT_DESTROY, ct);
- set_bit(IPS_DYING_BIT, &ct->status);
-
- helper = ct->helper;
- if (helper && helper->destroy)
- helper->destroy(ct);
-
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to ip_conntrack_lock!!! -HW */
- rcu_read_lock();
- proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
- if (proto && proto->destroy)
- proto->destroy(ct);
-
- destroyed = rcu_dereference(ip_conntrack_destroyed);
- if (destroyed)
- destroyed(ct);
-
- rcu_read_unlock();
-
- write_lock_bh(&ip_conntrack_lock);
- /* Expectations will have been removed in clean_from_lists,
- * except TFTP can create an expectation on the first packet,
- * before connection is in the list, so we need to clean here,
- * too. */
- ip_ct_remove_expectations(ct);
-
- /* We overload first tuple to link into unconfirmed list. */
- if (!is_confirmed(ct)) {
- BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- }
-
- CONNTRACK_STAT_INC(delete);
- write_unlock_bh(&ip_conntrack_lock);
-
- if (ct->master)
- ip_conntrack_put(ct->master);
-
- DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
- ip_conntrack_free(ct);
-}
-
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- struct ip_conntrack *ct = (void *)ul_conntrack;
-
- write_lock_bh(&ip_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- CONNTRACK_STAT_INC(delete_list);
- clean_from_lists(ct);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_put(ct);
-}
-
-struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
- unsigned int hash = hash_conntrack(tuple);
-
- list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
- if (tuplehash_to_ctrack(h) != ignored_conntrack &&
- ip_ct_tuple_equal(tuple, &h->tuple)) {
- CONNTRACK_STAT_INC(found);
- return h;
- }
- CONNTRACK_STAT_INC(searched);
- }
-
- return NULL;
-}
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h;
-}
-
-static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
- unsigned int hash,
- unsigned int repl_hash)
-{
- ct->id = ++ip_conntrack_next_id;
- list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
- &ip_conntrack_hash[hash]);
- list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
- &ip_conntrack_hash[repl_hash]);
-}
-
-void ip_conntrack_hash_insert(struct ip_conntrack *ct)
-{
- unsigned int hash, repl_hash;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- write_lock_bh(&ip_conntrack_lock);
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* Confirm a connection given skb; places it in hash table */
-int
-__ip_conntrack_confirm(struct sk_buff **pskb)
-{
- unsigned int hash, repl_hash;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* ipt_REJECT uses ip_conntrack_attach to attach related
- ICMP/TCP RST packets in other direction. Actual packet
- which created connection will be IP_CT_NEW or for an
- expected connection, IP_CT_RELATED. */
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- /* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
- /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
-
- /* No external references means noone else could have
- confirmed us. */
- IP_NF_ASSERT(!is_confirmed(ct));
- DEBUGP("Confirming conntrack %p\n", ct);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* See if there's one in the list already, including reverse:
- NAT could have grabbed it without realizing, since we're
- not in the hash. If there is, we lost race. */
- list_for_each_entry(h, &ip_conntrack_hash[hash], list)
- if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &h->tuple))
- goto out;
- list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
- if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &h->tuple))
- goto out;
-
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- CONNTRACK_STAT_INC(insert);
- write_unlock_bh(&ip_conntrack_lock);
- if (ct->helper)
- ip_conntrack_event_cache(IPCT_HELPER, *pskb);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
-#endif
- ip_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
-
- return NF_ACCEPT;
-
-out:
- CONNTRACK_STAT_INC(insert_failed);
- write_unlock_bh(&ip_conntrack_lock);
- return NF_DROP;
-}
-
-/* Returns true if a connection correspondings to the tuple (required
- for NAT). */
-int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h != NULL;
-}
-
-/* There's a small race here where we may free a just-assured
- connection. Too bad: we're in trouble anyway. */
-static int early_drop(struct list_head *chain)
-{
- /* Traverse backwards: gives us oldest, which is roughly LRU */
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct = NULL, *tmp;
- int dropped = 0;
-
- read_lock_bh(&ip_conntrack_lock);
- list_for_each_entry_reverse(h, chain, list) {
- tmp = tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
- ct = tmp;
- atomic_inc(&ct->ct_general.use);
- break;
- }
- }
- read_unlock_bh(&ip_conntrack_lock);
-
- if (!ct)
- return dropped;
-
- if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- dropped = 1;
- CONNTRACK_STAT_INC_ATOMIC(early_drop);
- }
- ip_conntrack_put(ct);
- return dropped;
-}
-
-static struct ip_conntrack_helper *
-__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
- return h;
- }
- return NULL;
-}
-
-struct ip_conntrack_helper *
-ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_helper *helper;
-
- /* need ip_conntrack_lock to assure that helper exists until
- * try_module_get() is called */
- read_lock_bh(&ip_conntrack_lock);
-
- helper = __ip_conntrack_helper_find(tuple);
- if (helper) {
- /* need to increase module usage count to assure helper will
- * not go away while the caller is e.g. busy putting a
- * conntrack in the hash that uses the helper */
- if (!try_module_get(helper->me))
- helper = NULL;
- }
-
- read_unlock_bh(&ip_conntrack_lock);
-
- return helper;
-}
-
-void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
-{
- module_put(helper->me);
-}
-
-struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol)
-{
- return ip_ct_protos[protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol)
-{
- struct ip_conntrack_protocol *p;
-
- rcu_read_lock();
- p = __ip_conntrack_proto_find(protocol);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_conntrack_generic_protocol;
- }
- rcu_read_unlock();
-
- return p;
-}
-
-void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
-{
- module_put(p->me);
-}
-
-struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
- struct ip_conntrack_tuple *repl)
-{
- struct ip_conntrack *conntrack;
-
- if (!ip_conntrack_hash_rnd_initted) {
- get_random_bytes(&ip_conntrack_hash_rnd, 4);
- ip_conntrack_hash_rnd_initted = 1;
- }
-
- /* We don't want any race condition at early drop stage */
- atomic_inc(&ip_conntrack_count);
-
- if (ip_conntrack_max
- && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
- unsigned int hash = hash_conntrack(orig);
- /* Try dropping from this hash chain. */
- if (!early_drop(&ip_conntrack_hash[hash])) {
- atomic_dec(&ip_conntrack_count);
- if (net_ratelimit())
- printk(KERN_WARNING
- "ip_conntrack: table full, dropping"
- " packet.\n");
- return ERR_PTR(-ENOMEM);
- }
- }
-
- conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
- if (!conntrack) {
- DEBUGP("Can't allocate conntrack.\n");
- atomic_dec(&ip_conntrack_count);
- return ERR_PTR(-ENOMEM);
- }
-
- atomic_set(&conntrack->ct_general.use, 1);
- conntrack->ct_general.destroy = destroy_conntrack;
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- /* Don't set timer yet: wait for confirmation */
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
-
- return conntrack;
-}
-
-void
-ip_conntrack_free(struct ip_conntrack *conntrack)
-{
- atomic_dec(&ip_conntrack_count);
- kmem_cache_free(ip_conntrack_cachep, conntrack);
-}
-
-/* Allocate a new conntrack: we return -ENOMEM if classification
- * failed due to stress. Otherwise it really is unclassifiable */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *protocol,
- struct sk_buff *skb)
-{
- struct ip_conntrack *conntrack;
- struct ip_conntrack_tuple repl_tuple;
- struct ip_conntrack_expect *exp;
-
- if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
- DEBUGP("Can't invert tuple.\n");
- return NULL;
- }
-
- conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
- if (conntrack == NULL || IS_ERR(conntrack))
- return (struct ip_conntrack_tuple_hash *)conntrack;
-
- if (!protocol->new(conntrack, skb)) {
- ip_conntrack_free(conntrack);
- return NULL;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- exp = find_expectation(tuple);
-
- if (exp) {
- DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
- conntrack, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
- conntrack->master = exp->master;
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
- conntrack->mark = exp->master->mark;
-#endif
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
- /* this is ugly, but there is no other place where to put it */
- conntrack->nat.masq_index = exp->master->nat.masq_index;
-#endif
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
- conntrack->secmark = exp->master->secmark;
-#endif
- nf_conntrack_get(&conntrack->master->ct_general);
- CONNTRACK_STAT_INC(expect_new);
- } else {
- conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
-
- CONNTRACK_STAT_INC(new);
- }
-
- /* Overload tuple linked list to put us in unconfirmed list. */
- list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-
- write_unlock_bh(&ip_conntrack_lock);
-
- if (exp) {
- if (exp->expectfn)
- exp->expectfn(conntrack, exp);
- ip_conntrack_expect_put(exp);
- }
-
- return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
-}
-
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
-static inline struct ip_conntrack *
-resolve_normal_ct(struct sk_buff *skb,
- struct ip_conntrack_protocol *proto,
- int *set_reply,
- unsigned int hooknum,
- enum ip_conntrack_info *ctinfo)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
-
- IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
-
- if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
- &tuple,proto))
- return NULL;
-
- /* look for tuple match */
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h) {
- h = init_conntrack(&tuple, proto, skb);
- if (!h)
- return NULL;
- if (IS_ERR(h))
- return (void *)h;
- }
- ct = tuplehash_to_ctrack(h);
-
- /* It exists; we have (non-exclusive) reference. */
- if (DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
- } else {
- /* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: normal packet for %p\n",
- ct);
- *ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: related packet for %p\n",
- ct);
- *ctinfo = IP_CT_RELATED;
- } else {
- DEBUGP("ip_conntrack_in: new packet for %p\n",
- ct);
- *ctinfo = IP_CT_NEW;
- }
- *set_reply = 0;
- }
- skb->nfct = &ct->ct_general;
- skb->nfctinfo = *ctinfo;
- return ct;
-}
-
-/* Netfilter hook itself. */
-unsigned int ip_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack_protocol *proto;
- int set_reply = 0;
- int ret;
-
- /* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct) {
- CONNTRACK_STAT_INC_ATOMIC(ignore);
- return NF_ACCEPT;
- }
-
- /* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
- if (net_ratelimit()) {
- printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
- }
- return NF_DROP;
- }
-
-/* Doesn't cover locally-generated broadcast, so not worth it. */
-#if 0
- /* Ignore broadcast: no `connection'. */
- if ((*pskb)->pkt_type == PACKET_BROADCAST) {
- printk("Broadcast packet!\n");
- return NF_ACCEPT;
- } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
- == htonl(0x000000FF)) {
- printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr),
- (*pskb)->sk, (*pskb)->pkt_type);
- }
-#endif
-
- /* rcu_read_lock()ed by nf_hook_slow */
- proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
-
- /* It may be an special packet, error, unclean...
- * inverse of the return code tells to the netfilter
- * core what to do with the packet. */
- if (proto->error != NULL
- && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
- CONNTRACK_STAT_INC_ATOMIC(error);
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return -ret;
- }
-
- if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
- /* Not valid part of a connection */
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return NF_ACCEPT;
- }
-
- if (IS_ERR(ct)) {
- /* Too stressed to deal. */
- CONNTRACK_STAT_INC_ATOMIC(drop);
- return NF_DROP;
- }
-
- IP_NF_ASSERT((*pskb)->nfct);
-
- ret = proto->packet(ct, *pskb, ctinfo);
- if (ret < 0) {
- /* Invalid: inverse of the return code tells
- * the netfilter core what to do*/
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return -ret;
- }
-
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_STATUS, *pskb);
-
- return ret;
-}
-
-int invert_tuplepr(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig)
-{
- struct ip_conntrack_protocol *proto;
- int ret;
-
- rcu_read_lock();
- proto = __ip_conntrack_proto_find(orig->dst.protonum);
- ret = ip_ct_invert_tuple(inverse, orig, proto);
- rcu_read_unlock();
-
- return ret;
-}
-
-/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- /* Part covered by intersection of masks must be unequal,
- otherwise they clash */
- struct ip_conntrack_tuple intersect_mask
- = { { a->mask.src.ip & b->mask.src.ip,
- { a->mask.src.u.all & b->mask.src.u.all } },
- { a->mask.dst.ip & b->mask.dst.ip,
- { a->mask.dst.u.all & b->mask.dst.u.all },
- a->mask.dst.protonum & b->mask.dst.protonum } };
-
- return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- return a->master == b->master
- && ip_ct_tuple_equal(&a->tuple, &b->tuple)
- && ip_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack_expect *i;
-
- write_lock_bh(&ip_conntrack_lock);
- /* choose the the oldest expectation to evict */
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(i);
- return;
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
-{
- struct ip_conntrack_expect *new;
-
- new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
- if (!new) {
- DEBUGP("expect_related: OOM allocating expect\n");
- return NULL;
- }
- new->master = me;
- atomic_set(&new->use, 1);
- return new;
-}
-
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
-{
- if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(ip_conntrack_expect_cachep, exp);
-}
-
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-{
- atomic_inc(&exp->use);
- exp->master->expecting++;
- list_add(&exp->list, &ip_conntrack_expect_list);
-
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
- exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
- add_timer(&exp->timeout);
-
- exp->id = ++ip_conntrack_expect_next_id;
- atomic_inc(&exp->use);
- CONNTRACK_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct ip_conntrack *master)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- break;
- }
- }
-}
-
-static inline int refresh_timer(struct ip_conntrack_expect *i)
-{
- if (!del_timer(&i->timeout))
- return 0;
-
- i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
-{
- struct ip_conntrack_expect *i;
- int ret;
-
- DEBUGP("ip_conntrack_expect_related %p\n", related_to);
- DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
- DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
-
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
- }
- } else if (expect_clash(i, expect)) {
- ret = -EBUSY;
- goto out;
- }
- }
-
- /* Will be over limit? */
- if (expect->master->helper->max_expected &&
- expect->master->expecting >= expect->master->helper->max_expected)
- evict_oldest_expect(expect->master);
-
- ip_conntrack_expect_insert(expect);
- ip_conntrack_expect_event(IPEXP_NEW, expect);
- ret = 0;
-out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-/* Alter reply tuple (maybe alter helper). This is for NAT, and is
- implicitly racy: see __ip_conntrack_confirm */
-void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
- const struct ip_conntrack_tuple *newreply)
-{
- write_lock_bh(&ip_conntrack_lock);
- /* Should be unconfirmed, so not in hash table yet */
- IP_NF_ASSERT(!is_confirmed(conntrack));
-
- DEBUGP("Altering reply tuple of %p to ", conntrack);
- DUMP_TUPLE(newreply);
-
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (!conntrack->master && conntrack->expecting == 0)
- conntrack->helper = __ip_conntrack_helper_find(newreply);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-{
- BUG_ON(me->timeout == 0);
- write_lock_bh(&ip_conntrack_lock);
- list_add(&me->list, &helpers);
- write_unlock_bh(&ip_conntrack_lock);
-
- return 0;
-}
-
-struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *name)
-{
- struct ip_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (!strcmp(h->name, name))
- return h;
- }
-
- return NULL;
-}
-
-static inline void unhelp(struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_helper *me)
-{
- if (tuplehash_to_ctrack(i)->helper == me) {
- ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
- tuplehash_to_ctrack(i)->helper = NULL;
- }
-}
-
-void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-{
- unsigned int i;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_expect *exp, *tmp;
-
- /* Need write lock here, to delete helper. */
- write_lock_bh(&ip_conntrack_lock);
- list_del(&me->list);
-
- /* Get rid of expectations */
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
- if (exp->master->helper == me && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- /* Get rid of expecteds, set helpers to NULL. */
- list_for_each_entry(h, &unconfirmed, list)
- unhelp(h, me);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- list_for_each_entry(h, &ip_conntrack_hash[i], list)
- unhelp(h, me);
- }
- write_unlock_bh(&ip_conntrack_lock);
-
- /* Someone could be still looking at the helper in a bh. */
- synchronize_net();
-}
-
-/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
-void __ip_ct_refresh_acct(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- unsigned long extra_jiffies,
- int do_acct)
-{
- int event = 0;
-
- IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
- IP_NF_ASSERT(skb);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* Only update if this is not a fixed timeout */
- if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
- write_unlock_bh(&ip_conntrack_lock);
- return;
- }
-
- /* If not in hash table, timer will not be active yet */
- if (!is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
- } else {
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
- add_timer(&ct->timeout);
- event = IPCT_REFRESH;
- }
- }
-
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (do_acct) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
- || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
- event |= IPCT_COUNTER_FILLING;
- }
-#endif
-
- write_unlock_bh(&ip_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- ip_conntrack_event_cache(event, skb);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
-int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
- &tuple->src.u.tcp.port);
- NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
- &tuple->dst.u.tcp.port);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *t)
-{
- if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
- return -EINVAL;
-
- t->src.u.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
- t->dst.u.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
-
- return 0;
-}
-#endif
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
- skb_orphan(skb);
-
- local_bh_disable();
- skb = ip_defrag(skb, user);
- local_bh_enable();
-
- if (skb)
- ip_send_check(skb->nh.iph);
- return skb;
-}
-
-/* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This ICMP is in reverse direction to the packet which caused it */
- ct = ip_conntrack_get(skb, &ctinfo);
-
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
- else
- ctinfo = IP_CT_RELATED;
-
- /* Attach to new skbuff, and increment count */
- nskb->nfct = &ct->ct_general;
- nskb->nfctinfo = ctinfo;
- nf_conntrack_get(nskb->nfct);
-}
-
-/* Bring out ya dead! */
-static struct ip_conntrack *
-get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
- void *data, unsigned int *bucket)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
-
- write_lock_bh(&ip_conntrack_lock);
- for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
- list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
- ct = tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
- }
- list_for_each_entry(h, &unconfirmed, list) {
- ct = tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- write_unlock_bh(&ip_conntrack_lock);
- return NULL;
-
-found:
- atomic_inc(&ct->ct_general.use);
- write_unlock_bh(&ip_conntrack_lock);
- return ct;
-}
-
-void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
-{
- struct ip_conntrack *ct;
- unsigned int bucket = 0;
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
- if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
- /* ... else the timer will get him soon. */
-
- ip_conntrack_put(ct);
- }
-}
-
-/* Fast function for those who don't want to parse /proc (and I don't
- blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
- mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
-
- IP_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
- tuple.dst.ip = inet->daddr;
- tuple.dst.u.tcp.port = inet->dport;
- tuple.dst.protonum = IPPROTO_TCP;
-
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
- return -ENOPROTOOPT;
- }
-
- if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
- DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
- *len, sizeof(struct sockaddr_in));
- return -EINVAL;
- }
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (h) {
- struct sockaddr_in sin;
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-
- sin.sin_family = AF_INET;
- sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.u.tcp.port;
- sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.ip;
- memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
- DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- ip_conntrack_put(ct);
- if (copy_to_user(user, &sin, sizeof(sin)) != 0)
- return -EFAULT;
- else
- return 0;
- }
- DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
- NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
- NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
- return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
- .pf = PF_INET,
- .get_optmin = SO_ORIGINAL_DST,
- .get_optmax = SO_ORIGINAL_DST+1,
- .get = &getorigdst,
-};
-
-static int kill_all(struct ip_conntrack *i, void *data)
-{
- return 1;
-}
-
-void ip_conntrack_flush(void)
-{
- ip_ct_iterate_cleanup(kill_all, NULL);
-}
-
-static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
-{
- if (vmalloced)
- vfree(hash);
- else
- free_pages((unsigned long)hash,
- get_order(sizeof(struct list_head) * size));
-}
-
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
- rcu_assign_pointer(ip_ct_attach, NULL);
-
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
- synchronize_net();
-
- ip_ct_event_cache_flush();
- i_see_dead_people:
- ip_conntrack_flush();
- if (atomic_read(&ip_conntrack_count) != 0) {
- schedule();
- goto i_see_dead_people;
- }
- /* wait until all references to ip_conntrack_untracked are dropped */
- while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
- schedule();
-
- kmem_cache_destroy(ip_conntrack_cachep);
- kmem_cache_destroy(ip_conntrack_expect_cachep);
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
- nf_unregister_sockopt(&so_getorigdst);
-}
-
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
-{
- struct list_head *hash;
- unsigned int i;
-
- *vmalloced = 0;
- hash = (void*)__get_free_pages(GFP_KERNEL,
- get_order(sizeof(struct list_head)
- * size));
- if (!hash) {
- *vmalloced = 1;
- printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
- hash = vmalloc(sizeof(struct list_head) * size);
- }
-
- if (hash)
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(&hash[i]);
-
- return hash;
-}
-
-static int set_hashsize(const char *val, struct kernel_param *kp)
-{
- int i, bucket, hashsize, vmalloced;
- int old_vmalloced, old_size;
- int rnd;
- struct list_head *hash, *old_hash;
- struct ip_conntrack_tuple_hash *h;
-
- /* On boot, we can set this without any fancy locking. */
- if (!ip_conntrack_htable_size)
- return param_set_int(val, kp);
-
- hashsize = simple_strtol(val, NULL, 0);
- if (!hashsize)
- return -EINVAL;
-
- hash = alloc_hashtable(hashsize, &vmalloced);
- if (!hash)
- return -ENOMEM;
-
- /* We have to rehash for the new table anyway, so we also can
- * use a new random seed */
- get_random_bytes(&rnd, 4);
-
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- while (!list_empty(&ip_conntrack_hash[i])) {
- h = list_entry(ip_conntrack_hash[i].next,
- struct ip_conntrack_tuple_hash, list);
- list_del(&h->list);
- bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
- list_add_tail(&h->list, &hash[bucket]);
- }
- }
- old_size = ip_conntrack_htable_size;
- old_vmalloced = ip_conntrack_vmalloc;
- old_hash = ip_conntrack_hash;
-
- ip_conntrack_htable_size = hashsize;
- ip_conntrack_vmalloc = vmalloced;
- ip_conntrack_hash = hash;
- ip_conntrack_hash_rnd = rnd;
- write_unlock_bh(&ip_conntrack_lock);
-
- free_conntrack_hash(old_hash, old_vmalloced, old_size);
- return 0;
-}
-
-module_param_call(hashsize, set_hashsize, param_get_uint,
- &ip_conntrack_htable_size, 0600);
-
-int __init ip_conntrack_init(void)
-{
- unsigned int i;
- int ret;
-
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
- * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- if (!ip_conntrack_htable_size) {
- ip_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- ip_conntrack_htable_size = 8192;
- if (ip_conntrack_htable_size < 16)
- ip_conntrack_htable_size = 16;
- }
- ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
- printk("ip_conntrack version %s (%u buckets, %d max)"
- " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
- ip_conntrack_htable_size, ip_conntrack_max,
- sizeof(struct ip_conntrack));
-
- ret = nf_register_sockopt(&so_getorigdst);
- if (ret != 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
- return ret;
- }
-
- ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
- &ip_conntrack_vmalloc);
- if (!ip_conntrack_hash) {
- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
- goto err_unreg_sockopt;
- }
-
- ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
- sizeof(struct ip_conntrack), 0,
- 0, NULL, NULL);
- if (!ip_conntrack_cachep) {
- printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
- goto err_free_hash;
- }
-
- ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
- sizeof(struct ip_conntrack_expect),
- 0, 0, NULL, NULL);
- if (!ip_conntrack_expect_cachep) {
- printk(KERN_ERR "Unable to create ip_expect slab cache\n");
- goto err_free_conntrack_slab;
- }
-
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < MAX_IP_CT_PROTO; i++)
- rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
- /* Sew in builtin protocols. */
- rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
- rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
- rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
- write_unlock_bh(&ip_conntrack_lock);
-
- /* For use by ipt_REJECT */
- rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
-
- /* Set up fake conntrack:
- - to never be deleted, not in any hashes */
- atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
- /* - and look it like as a confirmed connection */
- set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-
- return ret;
-
-err_free_conntrack_slab:
- kmem_cache_destroy(ip_conntrack_cachep);
-err_free_hash:
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
-err_unreg_sockopt:
- nf_unregister_sockopt(&so_getorigdst);
-
- return -ENOMEM;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index 1faa68ab9432..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* FTP extension for IP connection tracking. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/ctype.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp connection tracking helper");
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-static DEFINE_SPINLOCK(ip_ftp_lock);
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-
-static int loose;
-module_param(loose, bool, 0600);
-
-unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq);
-EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int try_rfc959(const char *, size_t, u_int32_t [], char);
-static int try_eprt(const char *, size_t, u_int32_t [], char);
-static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-
-static const struct ftp_search {
- const char *pattern;
- size_t plen;
- char skip;
- char term;
- enum ip_ct_ftp_type ftptype;
- int (*getnum)(const char *, size_t, u_int32_t[], char);
-} search[IP_CT_DIR_MAX][2] = {
- [IP_CT_DIR_ORIGINAL] = {
- {
- .pattern = "PORT",
- .plen = sizeof("PORT") - 1,
- .skip = ' ',
- .term = '\r',
- .ftptype = IP_CT_FTP_PORT,
- .getnum = try_rfc959,
- },
- {
- .pattern = "EPRT",
- .plen = sizeof("EPRT") - 1,
- .skip = ' ',
- .term = '\r',
- .ftptype = IP_CT_FTP_EPRT,
- .getnum = try_eprt,
- },
- },
- [IP_CT_DIR_REPLY] = {
- {
- .pattern = "227 ",
- .plen = sizeof("227 ") - 1,
- .skip = '(',
- .term = ')',
- .ftptype = IP_CT_FTP_PASV,
- .getnum = try_rfc959,
- },
- {
- .pattern = "229 ",
- .plen = sizeof("229 ") - 1,
- .skip = '(',
- .term = ')',
- .ftptype = IP_CT_FTP_EPSV,
- .getnum = try_epsv_response,
- },
- },
-};
-
-static int try_number(const char *data, size_t dlen, u_int32_t array[],
- int array_size, char sep, char term)
-{
- u_int32_t i, len;
-
- memset(array, 0, sizeof(array[0])*array_size);
-
- /* Keep data pointing at next char. */
- for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
- if (*data >= '0' && *data <= '9') {
- array[i] = array[i]*10 + *data - '0';
- }
- else if (*data == sep)
- i++;
- else {
- /* Unexpected character; true if it's the
- terminator and we're finished. */
- if (*data == term && i == array_size - 1)
- return len;
-
- DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
- len, i, *data);
- return 0;
- }
- }
- DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
- return 0;
-}
-
-/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
-static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- return try_number(data, dlen, array, 6, ',', term);
-}
-
-/* Grab port: number up to delimiter */
-static int get_port(const char *data, int start, size_t dlen, char delim,
- u_int32_t array[2])
-{
- u_int16_t port = 0;
- int i;
-
- for (i = start; i < dlen; i++) {
- /* Finished? */
- if (data[i] == delim) {
- if (port == 0)
- break;
- array[0] = port >> 8;
- array[1] = port;
- return i + 1;
- }
- else if (data[i] >= '0' && data[i] <= '9')
- port = port*10 + data[i] - '0';
- else /* Some other crap */
- break;
- }
- return 0;
-}
-
-/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
-static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
- int length;
-
- /* First character is delimiter, then "1" for IPv4, then
- delimiter again. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != '1' || data[2] != delim)
- return 0;
-
- DEBUGP("EPRT: Got |1|!\n");
- /* Now we have IP address. */
- length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
- if (length == 0)
- return 0;
-
- DEBUGP("EPRT: Got IP address!\n");
- /* Start offset includes initial "|1|", and trailing delimiter */
- return get_port(data, 3 + length + 1, dlen, delim, array+4);
-}
-
-/* Returns 0, or length of numbers: |||6446| */
-static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
-
- /* Three delimiters. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != delim || data[2] != delim)
- return 0;
-
- return get_port(data, 3, dlen, delim, array+4);
-}
-
-/* Return 1 for match, 0 for accept, -1 for partial. */
-static int find_pattern(const char *data, size_t dlen,
- const char *pattern, size_t plen,
- char skip, char term,
- unsigned int *numoff,
- unsigned int *numlen,
- u_int32_t array[6],
- int (*getnum)(const char *, size_t, u_int32_t[], char))
-{
- size_t i;
-
- DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
- if (dlen == 0)
- return 0;
-
- if (dlen <= plen) {
- /* Short packet: try for partial? */
- if (strnicmp(data, pattern, dlen) == 0)
- return -1;
- else return 0;
- }
-
- if (strnicmp(data, pattern, plen) != 0) {
-#if 0
- size_t i;
-
- DEBUGP("ftp: string mismatch\n");
- for (i = 0; i < plen; i++) {
- DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
- i, data[i], data[i],
- pattern[i], pattern[i]);
- }
-#endif
- return 0;
- }
-
- DEBUGP("Pattern matches!\n");
- /* Now we've found the constant string, try to skip
- to the 'skip' character */
- for (i = plen; data[i] != skip; i++)
- if (i == dlen - 1) return -1;
-
- /* Skip over the last character */
- i++;
-
- DEBUGP("Skipped up to `%c'!\n", skip);
-
- *numoff = i;
- *numlen = getnum(data + i, dlen - i, array, term);
- if (!*numlen)
- return -1;
-
- DEBUGP("Match succeeded!\n");
- return 1;
-}
-
-/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
-{
- unsigned int i;
-
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
- if (info->seq_aft_nl[dir][i] == seq)
- return 1;
- return 0;
-}
-
-/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
- struct sk_buff *skb)
-{
- unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
-
- /* Look for oldest: if we find exact match, we're done. */
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
- if (info->seq_aft_nl[dir][i] == nl_seq)
- return;
-
- if (oldest == info->seq_aft_nl_num[dir]
- || before(info->seq_aft_nl[dir][i], oldest))
- oldest = i;
- }
-
- if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- } else if (oldest != NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][oldest] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- }
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff, datalen;
- struct tcphdr _tcph, *th;
- char *fb_ptr;
- int ret;
- u32 seq, array[6] = { 0 };
- int dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff;
- struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
- struct ip_conntrack_expect *exp;
- unsigned int i;
- int found = 0, ends_in_nl;
- typeof(ip_nat_ftp_hook) ip_nat_ftp;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- /* No data? */
- if (dataoff >= (*pskb)->len) {
- DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
- datalen = (*pskb)->len - dataoff;
-
- spin_lock_bh(&ip_ftp_lock);
- fb_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, ftp_buffer);
- BUG_ON(fb_ptr == NULL);
-
- ends_in_nl = (fb_ptr[datalen - 1] == '\n');
- seq = ntohl(th->seq) + datalen;
-
- /* Look up to see if we're just after a \n. */
- if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
- /* Now if this ends in \n, update ftp info. */
- DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
- ct_ftp_info->seq_aft_nl[0][dir]
- old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- /* Initialize IP array to expected address (it's not mentioned
- in EPSV responses) */
- array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
- array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
- array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
- array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
-
- for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
- found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
- search[dir][i].pattern,
- search[dir][i].plen,
- search[dir][i].skip,
- search[dir][i].term,
- &matchoff, &matchlen,
- array,
- search[dir][i].getnum);
- if (found) break;
- }
- if (found == -1) {
- /* We don't usually drop packets. After all, this is
- connection tracking, not packet filtering.
- However, it is necessary for accurate tracking in
- this case. */
- if (net_ratelimit())
- printk("conntrack_ftp: partial %s %u+%u\n",
- search[dir][i].pattern,
- ntohl(th->seq), datalen);
- ret = NF_DROP;
- goto out;
- } else if (found == 0) { /* No match */
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
- fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
-
- /* Allocate expectation which will be inserted */
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* We refer to the reverse direction ("!dir") tuples here,
- * because we're expecting something in the other direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-
- if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
- != ct->tuplehash[dir].tuple.src.ip) {
- /* Enrico Scholz's passive FTP to partially RNAT'd ftp
- server: it really wants us to connect to a
- different IP address. Simply don't record it for
- NAT. */
- DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
- array[0], array[1], array[2], array[3],
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
-
- /* Thanks to Cristiano Lincoln Mattos
- <lincoln@cesar.org.br> for reporting this potential
- problem (DMZ machines opening holes to internal
- networks, or the packet filter itself). */
- if (!loose) {
- ret = NF_ACCEPT;
- goto out_put_expect;
- }
- exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
- | (array[2] << 8) | array[3]);
- }
-
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
- exp->tuple.src.u.tcp.port = 0; /* Don't care. */
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask = ((struct ip_conntrack_tuple)
- { { htonl(0xFFFFFFFF), { 0 } },
- { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- /* Now, NAT might want to mangle the packet, and register the
- * (possibly changed) expectation itself. */
- ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
- if (ip_nat_ftp)
- ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
- matchoff, matchlen, exp, &seq);
- else {
- /* Can't expect this? Best to drop packet now. */
- if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- else
- ret = NF_ACCEPT;
- }
-
-out_put_expect:
- ip_conntrack_expect_put(exp);
-
-out_update_nl:
- /* Now if this ends in \n, update ftp info. Seq may have been
- * adjusted by NAT code. */
- if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info,dir, *pskb);
- out:
- spin_unlock_bh(&ip_ftp_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
-/* Not __exit: called from init() */
-static void ip_conntrack_ftp_fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&ftp[i]);
- }
-
- kfree(ftp_buffer);
-}
-
-static int __init ip_conntrack_ftp_init(void)
-{
- int i, ret;
- char *tmpname;
-
- ftp_buffer = kmalloc(65536, GFP_KERNEL);
- if (!ftp_buffer)
- return -ENOMEM;
-
- if (ports_c == 0)
- ports[ports_c++] = FTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
- ftp[i].mask.dst.protonum = 0xFF;
- ftp[i].max_expected = 1;
- ftp[i].timeout = 5 * 60; /* 5 minutes */
- ftp[i].me = THIS_MODULE;
- ftp[i].help = help;
-
- tmpname = &ftp_names[i][0];
- if (ports[i] == FTP_PORT)
- sprintf(tmpname, "ftp");
- else
- sprintf(tmpname, "ftp-%d", ports[i]);
- ftp[i].name = tmpname;
-
- DEBUGP("ip_ct_ftp: registering helper for port %d\n",
- ports[i]);
- ret = ip_conntrack_helper_register(&ftp[i]);
-
- if (ret) {
- ip_conntrack_ftp_fini();
- return ret;
- }
- }
- return 0;
-}
-
-module_init(ip_conntrack_ftp_init);
-module_exit(ip_conntrack_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
deleted file mode 100644
index 53eb365ccc7e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ /dev/null
@@ -1,1841 +0,0 @@
-/*
- * H.323 connection tracking helper
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * For more information, please see http://nath323.sourceforge.net/
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>
-#include <linux/inet.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Parameters */
-static unsigned int default_rrq_ttl = 300;
-module_param(default_rrq_ttl, uint, 0600);
-MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
-
-static int gkrouted_only = 1;
-module_param(gkrouted_only, int, 0600);
-MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
-
-static int callforward_filter = 1;
-module_param(callforward_filter, bool, 0600);
-MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
- "if both endpoints are on different sides "
- "(determined by routing information)");
-
-/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff ** pskb,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- __be32 ip, u_int16_t port);
-int (*set_h225_addr_hook) (struct sk_buff ** pskb,
- unsigned char **data, int dataoff,
- TransportAddress * addr,
- __be32 ip, u_int16_t port);
-int (*set_sig_addr_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count);
-int (*set_ras_addr_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count);
-int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- u_int16_t port, u_int16_t rtp_port,
- struct ip_conntrack_expect * rtp_exp,
- struct ip_conntrack_expect * rtcp_exp);
-int (*nat_t120_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_h245_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_q931_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress * addr, int idx,
- u_int16_t port, struct ip_conntrack_expect * exp);
-
-
-static DEFINE_SPINLOCK(ip_h323_lock);
-static char *h323_buffer;
-
-/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int *datalen, int *dataoff)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- struct tcphdr _tcph, *th;
- int tcpdatalen;
- int tcpdataoff;
- unsigned char *tpkt;
- int tpktlen;
- int tpktoff;
-
- /* Get TCP header */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return 0;
-
- /* Get TCP data offset */
- tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4;
-
- /* Get TCP data length */
- tcpdatalen = (*pskb)->len - tcpdataoff;
- if (tcpdatalen <= 0) /* No TCP data */
- goto clear_out;
-
- if (*data == NULL) { /* first TPKT */
- /* Get first TPKT pointer */
- tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
- h323_buffer);
- BUG_ON(tpkt == NULL);
-
- /* Validate TPKT identifier */
- if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
- /* Netmeeting sends TPKT header and data separately */
- if (info->tpkt_len[dir] > 0) {
- DEBUGP("ip_ct_h323: previous packet "
- "indicated separate TPKT data of %hu "
- "bytes\n", info->tpkt_len[dir]);
- if (info->tpkt_len[dir] <= tcpdatalen) {
- /* Yes, there was a TPKT header
- * received */
- *data = tpkt;
- *datalen = info->tpkt_len[dir];
- *dataoff = 0;
- goto out;
- }
-
- /* Fragmented TPKT */
- if (net_ratelimit())
- printk("ip_ct_h323: "
- "fragmented TPKT\n");
- goto clear_out;
- }
-
- /* It is not even a TPKT */
- return 0;
- }
- tpktoff = 0;
- } else { /* Next TPKT */
- tpktoff = *dataoff + *datalen;
- tcpdatalen -= tpktoff;
- if (tcpdatalen <= 4) /* No more TPKT */
- goto clear_out;
- tpkt = *data + *datalen;
-
- /* Validate TPKT identifier */
- if (tpkt[0] != 0x03 || tpkt[1] != 0)
- goto clear_out;
- }
-
- /* Validate TPKT length */
- tpktlen = tpkt[2] * 256 + tpkt[3];
- if (tpktlen < 4)
- goto clear_out;
- if (tpktlen > tcpdatalen) {
- if (tcpdatalen == 4) { /* Separate TPKT header */
- /* Netmeeting sends TPKT header and data separately */
- DEBUGP("ip_ct_h323: separate TPKT header indicates "
- "there will be TPKT data of %hu bytes\n",
- tpktlen - 4);
- info->tpkt_len[dir] = tpktlen - 4;
- return 0;
- }
-
- if (net_ratelimit())
- printk("ip_ct_h323: incomplete TPKT (fragmented?)\n");
- goto clear_out;
- }
-
- /* This is the encapsulated data */
- *data = tpkt + 4;
- *datalen = tpktlen - 4;
- *dataoff = tpktoff + 4;
-
- out:
- /* Clear TPKT length */
- info->tpkt_len[dir] = 0;
- return 1;
-
- clear_out:
- info->tpkt_len[dir] = 0;
- return 0;
-}
-
-/****************************************************************************/
-static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
- __be32 * ip, u_int16_t * port)
-{
- unsigned char *p;
-
- if (addr->choice != eH245_TransportAddress_unicastAddress ||
- addr->unicastAddress.choice != eUnicastAddress_iPAddress)
- return 0;
-
- p = data + addr->unicastAddress.iPAddress.network;
- *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
- *port = (p[4] << 8) | (p[5]);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- u_int16_t rtp_port;
- struct ip_conntrack_expect *rtp_exp;
- struct ip_conntrack_expect *rtcp_exp;
- typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
-
- /* Read RTP or RTCP address */
- if (!get_h245_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* RTP port is even */
- rtp_port = port & (~1);
-
- /* Create expect for RTP */
- if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- rtp_exp->tuple.src.u.udp.port = 0;
- rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
- rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
- rtp_exp->mask.src.u.udp.port = 0;
- rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
- rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
- rtp_exp->mask.dst.protonum = 0xFF;
- rtp_exp->flags = 0;
-
- /* Create expect for RTCP */
- if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) {
- ip_conntrack_expect_put(rtp_exp);
- return -1;
- }
- rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- rtcp_exp->tuple.src.u.udp.port = 0;
- rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
- rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
- rtcp_exp->mask.src.u.udp.port = 0;
- rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
- rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
- rtcp_exp->mask.dst.protonum = 0xFF;
- rtcp_exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
- /* NAT needed */
- ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- addr, port, rtp_port, rtp_exp, rtcp_exp);
- } else { /* Conntrack only */
- rtp_exp->expectfn = NULL;
- rtcp_exp->expectfn = NULL;
-
- if (ip_conntrack_expect_related(rtp_exp) == 0) {
- if (ip_conntrack_expect_related(rtcp_exp) == 0) {
- DEBUGP("ip_ct_h323: expect RTP "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtp_exp->tuple.src.ip),
- ntohs(rtp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtp_exp->tuple.dst.ip),
- ntohs(rtp_exp->tuple.dst.u.udp.port));
- DEBUGP("ip_ct_h323: expect RTCP "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtcp_exp->tuple.src.ip),
- ntohs(rtcp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtcp_exp->tuple.dst.ip),
- ntohs(rtcp_exp->tuple.dst.u.udp.port));
- } else {
- ip_conntrack_unexpect_related(rtp_exp);
- ret = -1;
- }
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(rtp_exp);
- ip_conntrack_expect_put(rtcp_exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_t120_hook) nat_t120;
-
- /* Read T.120 address */
- if (!get_h245_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* Create expect for T.120 connections */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_t120 = rcu_dereference(nat_t120_hook))) {
- /* NAT needed */
- ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
- } else { /* Conntrack only */
- exp->expectfn = NULL;
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_h323: expect T.120 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H2250LogicalChannelParameters * channel)
-{
- int ret;
-
- if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
- /* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &channel->mediaChannel);
- if (ret < 0)
- return -1;
- }
-
- if (channel->
- options & eH2250LogicalChannelParameters_mediaControlChannel) {
- /* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &channel->mediaControlChannel);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- OpenLogicalChannel * olc)
-{
- int ret;
-
- DEBUGP("ip_ct_h323: OpenLogicalChannel\n");
-
- if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
- eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
- {
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olc->
- forwardLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olc->options &
- eOpenLogicalChannel_reverseLogicalChannelParameters) &&
- (olc->reverseLogicalChannelParameters.options &
- eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
- && (olc->reverseLogicalChannelParameters.multiplexParameters.
- choice ==
- eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
- {
- ret =
- process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olc->
- reverseLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olc->options & eOpenLogicalChannel_separateStack) &&
- olc->forwardLogicalChannelParameters.dataType.choice ==
- eDataType_data &&
- olc->forwardLogicalChannelParameters.dataType.data.application.
- choice == eDataApplicationCapability_application_t120 &&
- olc->forwardLogicalChannelParameters.dataType.data.application.
- t120.choice == eDataProtocolCapability_separateLANStack &&
- olc->separateStack.networkAddress.choice ==
- eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
- &olc->separateStack.networkAddress.
- localAreaAddress);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- OpenLogicalChannelAck * olca)
-{
- H2250LogicalChannelAckParameters *ack;
- int ret;
-
- DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n");
-
- if ((olca->options &
- eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
- (olca->reverseLogicalChannelParameters.options &
- eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
- && (olca->reverseLogicalChannelParameters.multiplexParameters.
- choice ==
- eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
- {
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olca->
- reverseLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olca->options &
- eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
- (olca->forwardMultiplexAckParameters.choice ==
- eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
- {
- ack = &olca->forwardMultiplexAckParameters.
- h2250LogicalChannelAckParameters;
- if (ack->options &
- eH2250LogicalChannelAckParameters_mediaChannel) {
- /* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &ack->mediaChannel);
- if (ret < 0)
- return -1;
- }
-
- if (ack->options &
- eH2250LogicalChannelAckParameters_mediaControlChannel) {
- /* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &ack->mediaControlChannel);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- MultimediaSystemControlMessage * mscm)
-{
- switch (mscm->choice) {
- case eMultimediaSystemControlMessage_request:
- if (mscm->request.choice ==
- eRequestMessage_openLogicalChannel) {
- return process_olc(pskb, ct, ctinfo, data, dataoff,
- &mscm->request.openLogicalChannel);
- }
- DEBUGP("ip_ct_h323: H.245 Request %d\n",
- mscm->request.choice);
- break;
- case eMultimediaSystemControlMessage_response:
- if (mscm->response.choice ==
- eResponseMessage_openLogicalChannelAck) {
- return process_olca(pskb, ct, ctinfo, data, dataoff,
- &mscm->response.
- openLogicalChannelAck);
- }
- DEBUGP("ip_ct_h323: H.245 Response %d\n",
- mscm->response.choice);
- break;
- default:
- DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice);
- break;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static MultimediaSystemControlMessage mscm;
- unsigned char *data = NULL;
- int datalen;
- int dataoff;
- int ret;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- return NF_ACCEPT;
- }
- DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Process each TPKT */
- while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
- DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode H.245 signal */
- ret = DecodeMultimediaSystemControlMessage(data, datalen,
- &mscm);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_h245: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- /* We don't drop when decoding error */
- break;
- }
-
- /* Process H.245 signal */
- if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
- goto drop;
- }
-
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_h245: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_h245 = {
- .name = "H.245",
- .me = THIS_MODULE,
- .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ ,
- .timeout = 240,
- .tuple = {.dst = {.protonum = IPPROTO_TCP}},
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF}},
- .help = h245_help
-};
-
-/****************************************************************************/
-void ip_conntrack_h245_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_h245;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-int get_h225_addr(unsigned char *data, TransportAddress * addr,
- __be32 * ip, u_int16_t * port)
-{
- unsigned char *p;
-
- if (addr->choice != eTransportAddress_ipAddress)
- return 0;
-
- p = data + addr->ipAddress.ip;
- *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
- *port = (p[4] << 8) | (p[5]);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_h245_hook) nat_h245;
-
- /* Read h245Address */
- if (!get_h225_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* Create expect for h245 connection */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_h245 = rcu_dereference(nat_h245_hook))) {
- /* NAT needed */
- ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_h245_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_q931: expect H.245 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/* Forwarding declaration */
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_callforwarding_hook) nat_callforwarding;
-
- /* Read alternativeAddress */
- if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
- return 0;
-
- /* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
- if (callforward_filter) {
- struct rtable *rt1, *rt2;
- struct flowi fl1 = {
- .fl4_dst = ip,
- };
- struct flowi fl2 = {
- .fl4_dst = ct->tuplehash[!dir].tuple.src.ip,
- };
-
- if (ip_route_output_key(&rt1, &fl1) == 0) {
- if (ip_route_output_key(&rt2, &fl2) == 0) {
- if (rt1->rt_gateway == rt2->rt_gateway &&
- rt1->u.dst.dev == rt2->u.dst.dev)
- ret = 1;
- dst_release(&rt2->u.dst);
- }
- dst_release(&rt1->u.dst);
- }
- if (ret) {
- DEBUGP("ip_ct_q931: Call Forwarding not tracked\n");
- return 0;
- }
- }
-
- /* Create expect for the second call leg */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
- /* Need NAT */
- ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
- addr, port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_q931: expect Call Forwarding "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Setup_UUIE * setup)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- int i;
- __be32 ip;
- u_int16_t port;
- typeof(set_h225_addr_hook) set_h225_addr;
-
- DEBUGP("ip_ct_q931: Setup\n");
-
- if (setup->options & eSetup_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &setup->h245Address);
- if (ret < 0)
- return -1;
- }
-
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
-
- if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
- (set_h225_addr) &&
- get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
- ip != ct->tuplehash[!dir].tuple.src.ip) {
- DEBUGP("ip_ct_q931: set destCallSignalAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
- ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
- &setup->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.src.ip,
- ntohs(ct->tuplehash[!dir].tuple.src.
- u.tcp.port));
- if (ret < 0)
- return -1;
- }
-
- if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
- (set_h225_addr) &&
- get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
- && ip != ct->tuplehash[!dir].tuple.dst.ip) {
- DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
- ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
- &setup->sourceCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- ntohs(ct->tuplehash[!dir].tuple.dst.
- u.tcp.port));
- if (ret < 0)
- return -1;
- }
-
- if (setup->options & eSetup_UUIE_fastStart) {
- for (i = 0; i < setup->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &setup->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- CallProceeding_UUIE * callproc)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: CallProceeding\n");
-
- if (callproc->options & eCallProceeding_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &callproc->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (callproc->options & eCallProceeding_UUIE_fastStart) {
- for (i = 0; i < callproc->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &callproc->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Connect_UUIE * connect)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Connect\n");
-
- if (connect->options & eConnect_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &connect->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (connect->options & eConnect_UUIE_fastStart) {
- for (i = 0; i < connect->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &connect->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Alerting_UUIE * alert)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Alerting\n");
-
- if (alert->options & eAlerting_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &alert->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (alert->options & eAlerting_UUIE_fastStart) {
- for (i = 0; i < alert->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &alert->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_information(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Information_UUIE * info)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Information\n");
-
- if (info->options & eInformation_UUIE_fastStart) {
- for (i = 0; i < info->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &info->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Facility_UUIE * facility)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Facility\n");
-
- if (facility->reason.choice == eFacilityReason_callForwarded) {
- if (facility->options & eFacility_UUIE_alternativeAddress)
- return expect_callforwarding(pskb, ct, ctinfo, data,
- dataoff,
- &facility->
- alternativeAddress);
- return 0;
- }
-
- if (facility->options & eFacility_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &facility->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (facility->options & eFacility_UUIE_fastStart) {
- for (i = 0; i < facility->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &facility->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Progress_UUIE * progress)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Progress\n");
-
- if (progress->options & eProgress_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &progress->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (progress->options & eProgress_UUIE_fastStart) {
- for (i = 0; i < progress->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &progress->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff, Q931 * q931)
-{
- H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
- int i;
- int ret = 0;
-
- switch (pdu->h323_message_body.choice) {
- case eH323_UU_PDU_h323_message_body_setup:
- ret = process_setup(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.setup);
- break;
- case eH323_UU_PDU_h323_message_body_callProceeding:
- ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.
- callProceeding);
- break;
- case eH323_UU_PDU_h323_message_body_connect:
- ret = process_connect(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.connect);
- break;
- case eH323_UU_PDU_h323_message_body_alerting:
- ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.alerting);
- break;
- case eH323_UU_PDU_h323_message_body_information:
- ret = process_information(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.
- information);
- break;
- case eH323_UU_PDU_h323_message_body_facility:
- ret = process_facility(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.facility);
- break;
- case eH323_UU_PDU_h323_message_body_progress:
- ret = process_progress(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.progress);
- break;
- default:
- DEBUGP("ip_ct_q931: Q.931 signal %d\n",
- pdu->h323_message_body.choice);
- break;
- }
-
- if (ret < 0)
- return -1;
-
- if (pdu->options & eH323_UU_PDU_h245Control) {
- for (i = 0; i < pdu->h245Control.count; i++) {
- ret = process_h245(pskb, ct, ctinfo, data, dataoff,
- &pdu->h245Control.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static Q931 q931;
- unsigned char *data = NULL;
- int datalen;
- int dataoff;
- int ret;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- return NF_ACCEPT;
- }
- DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Process each TPKT */
- while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
- DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode Q.931 signal */
- ret = DecodeQ931(data, datalen, &q931);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_q931: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- /* We don't drop when decoding error */
- break;
- }
-
- /* Process Q.931 signal */
- if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
- goto drop;
- }
-
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_q931: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
- .name = "Q.931",
- .me = THIS_MODULE,
- .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
- .timeout = 240,
- .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
- .dst = {.protonum = IPPROTO_TCP}},
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF}},
- .help = q931_help
-};
-
-/****************************************************************************/
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_q931;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
-{
- struct udphdr _uh, *uh;
- int dataoff;
-
- uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh),
- &_uh);
- if (uh == NULL)
- return NULL;
- dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh);
- if (dataoff >= (*pskb)->len)
- return NULL;
- *datalen = (*pskb)->len - dataoff;
- return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
-}
-
-/****************************************************************************/
-static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
- __be32 ip, u_int16_t port)
-{
- struct ip_conntrack_expect *exp;
- struct ip_conntrack_tuple tuple;
-
- tuple.src.ip = 0;
- tuple.src.u.tcp.port = 0;
- tuple.dst.ip = ip;
- tuple.dst.u.tcp.port = htons(port);
- tuple.dst.protonum = IPPROTO_TCP;
-
- exp = __ip_conntrack_expect_find(&tuple);
- if (exp && exp->master == ct)
- return exp;
- return NULL;
-}
-
-/****************************************************************************/
-static int set_expect_timeout(struct ip_conntrack_expect *exp,
- unsigned timeout)
-{
- if (!exp || !del_timer(&exp->timeout))
- return 0;
-
- exp->timeout.expires = jiffies + timeout * HZ;
- add_timer(&exp->timeout);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- int i;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
- typeof(nat_q931_hook) nat_q931;
-
- /* Look for the first related address */
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip && port != 0)
- break;
- }
-
- if (i >= count) /* Not found */
- return 0;
-
- /* Create expect for Q.931 */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = gkrouted_only ? /* only accept calls from GK? */
- ct->tuplehash[!dir].tuple.src.ip : 0;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */
-
- nat_q931 = rcu_dereference(nat_q931_hook);
- if (nat_q931) { /* Need NAT */
- ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
-
- /* Save port for looking up expect in processing RCF */
- info->sig_port[dir] = port;
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, GatekeeperRequest * grq)
-{
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: GRQ\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) /* NATed */
- return set_ras_addr(pskb, ct, ctinfo, data,
- &grq->rasAddress, 1);
- return 0;
-}
-
-/* Declare before using */
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, GatekeeperConfirm * gcf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("ip_ct_ras: GCF\n");
-
- if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port))
- return 0;
-
- /* Registration port is the same as discovery port */
- if (ip == ct->tuplehash[dir].tuple.src.ip &&
- port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port))
- return 0;
-
- /* Avoid RAS expectation loops. A GCF is never expected. */
- if (test_bit(IPS_EXPECTED_BIT, &ct->status))
- return 0;
-
- /* Need new expect */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_UDP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
- exp->expectfn = ip_conntrack_ras_expect;
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect RAS "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RegistrationRequest * rrq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: RRQ\n");
-
- ret = expect_q931(pskb, ct, ctinfo, data,
- rrq->callSignalAddress.item,
- rrq->callSignalAddress.count);
- if (ret < 0)
- return -1;
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
- rrq->rasAddress.item,
- rrq->rasAddress.count);
- if (ret < 0)
- return -1;
- }
-
- if (rrq->options & eRegistrationRequest_timeToLive) {
- DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
- info->timeout = rrq->timeToLive;
- } else
- info->timeout = default_rrq_ttl;
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RegistrationConfirm * rcf)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- struct ip_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: RCF\n");
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- rcf->callSignalAddress.item,
- rcf->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- if (rcf->options & eRegistrationConfirm_timeToLive) {
- DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
- info->timeout = rcf->timeToLive;
- }
-
- if (info->timeout > 0) {
- DEBUGP
- ("ip_ct_ras: set RAS connection timeout to %u seconds\n",
- info->timeout);
- ip_ct_refresh(ct, *pskb, info->timeout * HZ);
-
- /* Set expect timeout */
- read_lock_bh(&ip_conntrack_lock);
- exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip,
- info->sig_port[!dir]);
- if (exp) {
- DEBUGP("ip_ct_ras: set Q.931 expect "
- "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) "
- "timeout to %u seconds\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port),
- info->timeout);
- set_expect_timeout(exp, info->timeout);
- }
- read_unlock_bh(&ip_conntrack_lock);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, UnregistrationRequest * urq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: URQ\n");
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- urq->callSignalAddress.item,
- urq->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- /* Clear old expect */
- ip_ct_remove_expectations(ct);
- info->sig_port[dir] = 0;
- info->sig_port[!dir] = 0;
-
- /* Give it 30 seconds for UCF or URJ */
- ip_ct_refresh(ct, *pskb, 30 * HZ);
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, AdmissionRequest * arq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- __be32 ip;
- u_int16_t port;
- typeof(set_h225_addr_hook) set_h225_addr;
-
- DEBUGP("ip_ct_ras: ARQ\n");
-
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
- if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
- get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip &&
- port == info->sig_port[dir] && set_h225_addr) {
- /* Answering ARQ */
- return set_h225_addr(pskb, data, 0,
- &arq->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- info->sig_port[!dir]);
- }
-
- if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
- get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
- /* Calling ARQ */
- return set_h225_addr(pskb, data, 0,
- &arq->srcCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- port);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, AdmissionConfirm * acf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: ACF\n");
-
- if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port))
- return 0;
-
- if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr)
- return set_sig_addr(pskb, ct, ctinfo, data,
- &acf->destCallSignalAddress, 1);
- return 0;
- }
-
- /* Need new expect */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT;
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, LocationRequest * lrq)
-{
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: LRQ\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr)
- return set_ras_addr(pskb, ct, ctinfo, data,
- &lrq->replyAddress, 1);
- return 0;
-}
-
-/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, LocationConfirm * lcf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
-
- DEBUGP("ip_ct_ras: LCF\n");
-
- if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port))
- return 0;
-
- /* Need new expect for call signal */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT;
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- /* Ignore rasAddress */
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, InfoRequestResponse * irr)
-{
- int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: IRR\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
- &irr->rasAddress, 1);
- if (ret < 0)
- return -1;
- }
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- irr->callSignalAddress.item,
- irr->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RasMessage * ras)
-{
- switch (ras->choice) {
- case eRasMessage_gatekeeperRequest:
- return process_grq(pskb, ct, ctinfo, data,
- &ras->gatekeeperRequest);
- case eRasMessage_gatekeeperConfirm:
- return process_gcf(pskb, ct, ctinfo, data,
- &ras->gatekeeperConfirm);
- case eRasMessage_registrationRequest:
- return process_rrq(pskb, ct, ctinfo, data,
- &ras->registrationRequest);
- case eRasMessage_registrationConfirm:
- return process_rcf(pskb, ct, ctinfo, data,
- &ras->registrationConfirm);
- case eRasMessage_unregistrationRequest:
- return process_urq(pskb, ct, ctinfo, data,
- &ras->unregistrationRequest);
- case eRasMessage_admissionRequest:
- return process_arq(pskb, ct, ctinfo, data,
- &ras->admissionRequest);
- case eRasMessage_admissionConfirm:
- return process_acf(pskb, ct, ctinfo, data,
- &ras->admissionConfirm);
- case eRasMessage_locationRequest:
- return process_lrq(pskb, ct, ctinfo, data,
- &ras->locationRequest);
- case eRasMessage_locationConfirm:
- return process_lcf(pskb, ct, ctinfo, data,
- &ras->locationConfirm);
- case eRasMessage_infoRequestResponse:
- return process_irr(pskb, ct, ctinfo, data,
- &ras->infoRequestResponse);
- default:
- DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice);
- break;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static RasMessage ras;
- unsigned char *data;
- int datalen = 0;
- int ret;
-
- DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Get UDP data */
- data = get_udp_data(pskb, &datalen);
- if (data == NULL)
- goto accept;
- DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode RAS message */
- ret = DecodeRasMessage(data, datalen, &ras);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_ras: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- goto accept;
- }
-
- /* Process RAS message */
- if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
- goto drop;
-
- accept:
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_ras: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_ras = {
- .name = "RAS",
- .me = THIS_MODULE,
- .max_expected = 32,
- .timeout = 240,
- .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP}},
- .mask = {.src = {.u = {0xFFFE}},
- .dst = {.protonum = 0xFF}},
- .help = ras_help,
-};
-
-/****************************************************************************/
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_ras;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-/* Not __exit - called from init() */
-static void fini(void)
-{
- ip_conntrack_helper_unregister(&ip_conntrack_helper_ras);
- ip_conntrack_helper_unregister(&ip_conntrack_helper_q931);
- kfree(h323_buffer);
- DEBUGP("ip_ct_h323: fini\n");
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
- int ret;
-
- h323_buffer = kmalloc(65536, GFP_KERNEL);
- if (!h323_buffer)
- return -ENOMEM;
- if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) ||
- (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) {
- fini();
- return ret;
- }
- DEBUGP("ip_ct_h323: init success\n");
- return 0;
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL_GPL(get_h225_addr);
-EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect);
-EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect);
-EXPORT_SYMBOL_GPL(set_h245_addr_hook);
-EXPORT_SYMBOL_GPL(set_h225_addr_hook);
-EXPORT_SYMBOL_GPL(set_sig_addr_hook);
-EXPORT_SYMBOL_GPL(set_ras_addr_hook);
-EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
-EXPORT_SYMBOL_GPL(nat_t120_hook);
-EXPORT_SYMBOL_GPL(nat_h245_hook);
-EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
-EXPORT_SYMBOL_GPL(nat_q931_hook);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 connection tracking helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index 2b760c5cf709..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * ip_conntrack_pptp.c - Version 3.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * - We blindly assume that control connections are always
- * established in PNS->PAC direction. This is a violation
- * of RFFC2673
- * - We can only support one single call within each session
- *
- * TODO:
- * - testing of incoming PPTP calls
- *
- * Changes:
- * 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
- * pptp_destroy_siblings() to destroy expectations in case
- * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * (Philip Craig <philipc@snapgear.com>)
- * - Add Version information at module loadtime
- * 2002-02-10 - Version 1.6
- * - move to C99 style initializers
- * - remove second expectation if first arrives
- * 2004-10-22 - Version 2.0
- * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * - fix lots of linear skb assumptions from Mandrake's port
- * 2005-06-10 - Version 2.1
- * - use ip_conntrack_expect_free() instead of kfree() on the
- * expect's (which are from the slab for quite some time)
- * 2005-06-10 - Version 3.0
- * - port helper to post-2.6.11 API changes,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 2005-07-30 - Version 3.1
- * - port helper to 2.6.13 API changes
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "3.1"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-static DEFINE_SPINLOCK(ip_pptp_lock);
-
-int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-void
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply);
-
-void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp);
-
-#if 0
-/* PptpControlMessageType names */
-const char *pptp_msg_name[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
-};
-EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-
-#define PPTP_GRE_TIMEOUT (10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
-
-static void pptp_expectfn(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
-
- DEBUGP("increasing timeouts\n");
-
- /* increase timeout of GRE data channel conntrack entry */
- ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
- ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
- /* Can you see how rusty this code is, compared with the pre-2.6.11
- * one? That's what happened to my shiny newnat of 2002 ;( -HW */
-
- rcu_read_lock();
- ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
- if (!ip_nat_pptp_expectfn) {
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_expect *exp_other;
-
- /* obviously this tuple inversion only works until you do NAT */
- invert_tuplepr(&inv_t, &exp->tuple);
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&inv_t);
-
- exp_other = ip_conntrack_expect_find_get(&inv_t);
- if (exp_other) {
- /* delete other expectation. */
- DEBUGP("found\n");
- ip_conntrack_unexpect_related(exp_other);
- ip_conntrack_expect_put(exp_other);
- } else {
- DEBUGP("not found\n");
- }
- } else {
- /* we need more than simple inversion */
- ip_nat_pptp_expectfn(ct, exp);
- }
- rcu_read_unlock();
-}
-
-static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("trying to timeout ct or exp for tuple ");
- DUMP_TUPLE(t);
-
- h = ip_conntrack_find_get(t, NULL);
- if (h) {
- struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
- DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
- sibling->proto.gre.timeout = 0;
- sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
- ip_conntrack_put(sibling);
- return 1;
- } else {
- exp = ip_conntrack_expect_find_get(t);
- if (exp) {
- DEBUGP("unexpect_related of expect %p\n", exp);
- ip_conntrack_unexpect_related(exp);
- ip_conntrack_expect_put(exp);
- return 1;
- }
- }
-
- return 0;
-}
-
-
-/* timeout GRE data connections */
-static void pptp_destroy_siblings(struct ip_conntrack *ct)
-{
- struct ip_conntrack_tuple t;
-
- ip_ct_gre_keymap_destroy(ct);
- /* Since ct->sibling_list has literally rusted away in 2.6.11,
- * we now need another way to find out about our sibling
- * contrack and expects... -HW */
-
- /* try original (pns->pac) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
- t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout original pns->pac ct/exp\n");
-
- /* try reply (pac->pns) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
- t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout reply pac->pns ct/exp\n");
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *ct,
- __be16 callid,
- __be16 peer_callid)
-{
- struct ip_conntrack_expect *exp_orig, *exp_reply;
- int ret = 1;
- typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
-
- exp_orig = ip_conntrack_expect_alloc(ct);
- if (exp_orig == NULL)
- goto out;
-
- exp_reply = ip_conntrack_expect_alloc(ct);
- if (exp_reply == NULL)
- goto out_put_orig;
-
- /* original direction, PNS->PAC */
- exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- exp_orig->tuple.src.u.gre.key = peer_callid;
- exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- exp_orig->tuple.dst.u.gre.key = callid;
- exp_orig->tuple.dst.protonum = IPPROTO_GRE;
-
- exp_orig->mask.src.ip = htonl(0xffffffff);
- exp_orig->mask.src.u.all = 0;
- exp_orig->mask.dst.u.gre.key = htons(0xffff);
- exp_orig->mask.dst.ip = htonl(0xffffffff);
- exp_orig->mask.dst.protonum = 0xff;
-
- exp_orig->master = ct;
- exp_orig->expectfn = pptp_expectfn;
- exp_orig->flags = 0;
-
- /* both expectations are identical apart from tuple */
- memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-
- /* reply direction, PAC->PNS */
- exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- exp_reply->tuple.src.u.gre.key = callid;
- exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- exp_reply->tuple.dst.u.gre.key = peer_callid;
- exp_reply->tuple.dst.protonum = IPPROTO_GRE;
-
- ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
- if (ip_nat_pptp_exp_gre)
- ip_nat_pptp_exp_gre(exp_orig, exp_reply);
- if (ip_conntrack_expect_related(exp_orig) != 0)
- goto out_put_both;
- if (ip_conntrack_expect_related(exp_reply) != 0)
- goto out_unexpect_orig;
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
- goto out_unexpect_both;
- if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(ct);
- goto out_unexpect_both;
- }
- ret = 0;
-
-out_put_both:
- ip_conntrack_expect_put(exp_reply);
-out_put_orig:
- ip_conntrack_expect_put(exp_orig);
-out:
- return ret;
-
-out_unexpect_both:
- ip_conntrack_unexpect_related(exp_reply);
-out_unexpect_orig:
- ip_conntrack_unexpect_related(exp_orig);
- goto out_put_both;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq,
- unsigned int reqlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 cid = 0, pcid = 0;
- typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REPLY:
- /* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED)
- goto invalid;
- if (pptpReq->srep.resultCode == PPTP_START_OK)
- info->sstate = PPTP_SESSION_CONFIRMED;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_STOP_SESSION_REPLY:
- /* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ)
- goto invalid;
- if (pptpReq->strep.resultCode == PPTP_STOP_OK)
- info->sstate = PPTP_SESSION_NONE;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_OUT_CALL_REPLY:
- /* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF)
- goto invalid;
-
- cid = pptpReq->ocack.callID;
- pcid = pptpReq->ocack.peersCallID;
- if (info->pns_call_id != pcid)
- goto invalid;
- DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
- ntohs(cid), ntohs(pcid));
-
- if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
- info->cstate = PPTP_CALL_OUT_CONF;
- info->pac_call_id = cid;
- exp_gre(ct, cid, pcid);
- } else
- info->cstate = PPTP_CALL_NONE;
- break;
-
- case PPTP_IN_CALL_REQUEST:
- /* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
-
- cid = pptpReq->icreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = cid;
- break;
-
- case PPTP_IN_CALL_CONNECT:
- /* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- if (info->cstate != PPTP_CALL_IN_REP &&
- info->cstate != PPTP_CALL_IN_CONF)
- goto invalid;
-
- pcid = pptpReq->iccon.peersCallID;
- cid = info->pac_call_id;
-
- if (info->pns_call_id != pcid)
- goto invalid;
-
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
- info->cstate = PPTP_CALL_IN_CONF;
-
- /* we expect a GRE connection from PAC to PNS */
- exp_gre(ct, cid, pcid);
- break;
-
- case PPTP_CALL_DISCONNECT_NOTIFY:
- /* server confirms disconnect */
- cid = pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- break;
-
- case PPTP_WAN_ERROR_NOTIFY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- goto invalid;
- }
-
- ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
- if (ip_nat_pptp_inbound)
- return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
- return NF_ACCEPT;
-
-invalid:
- DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
- "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
- msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
- ntohs(info->pns_call_id), ntohs(info->pac_call_id));
- return NF_ACCEPT;
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq,
- unsigned int reqlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 cid = 0, pcid = 0;
- typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REQUEST:
- /* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE)
- goto invalid;
- info->sstate = PPTP_SESSION_REQUESTED;
- break;
- case PPTP_STOP_SESSION_REQUEST:
- /* client requests end of control session */
- info->sstate = PPTP_SESSION_STOPREQ;
- break;
-
- case PPTP_OUT_CALL_REQUEST:
- /* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- info->cstate = PPTP_CALL_OUT_REQ;
- /* track PNS call id */
- cid = pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->pns_call_id = cid;
- break;
- case PPTP_IN_CALL_REPLY:
- /* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ &&
- info->cstate != PPTP_CALL_IN_REP)
- goto invalid;
-
- cid = pptpReq->icack.callID;
- pcid = pptpReq->icack.peersCallID;
- if (info->pac_call_id != pcid)
- goto invalid;
- DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
- ntohs(cid), ntohs(pcid));
-
- if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = cid;
- } else
- info->cstate = PPTP_CALL_NONE;
- break;
-
- case PPTP_CALL_CLEAR_REQUEST:
- /* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- /* FUTURE: iterate over all calls and check if
- * call ID is valid. We don't do this without newnat,
- * because we only know about last call */
- info->cstate = PPTP_CALL_CLEAR_REQ;
- break;
- case PPTP_SET_LINK_INFO:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- goto invalid;
- }
-
- ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
- if (ip_nat_pptp_outbound)
- return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
- return NF_ACCEPT;
-
-invalid:
- DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
- "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
- msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
- ntohs(info->pns_call_id), ntohs(info->pac_call_id));
- return NF_ACCEPT;
-}
-
-static const unsigned int pptp_msg_size[] = {
- [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
- [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
- [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
- [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
- [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
- [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
- [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
- [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
- [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
- [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
- [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
- [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
- [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
-};
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
- int dir = CTINFO2DIR(ctinfo);
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- struct tcphdr _tcph, *tcph;
- struct pptp_pkt_hdr _pptph, *pptph;
- struct PptpControlHeader _ctlh, *ctlh;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
- unsigned int datalen, reqlen, nexthdr_off;
- int oldsstate, oldcstate;
- int ret;
- u_int16_t msg;
-
- /* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ctinfo = %u, skipping\n", ctinfo);
- return NF_ACCEPT;
- }
-
- nexthdr_off = (*pskb)->nh.iph->ihl*4;
- tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
- nexthdr_off += tcph->doff * 4;
- datalen = tcplen - tcph->doff * 4;
-
- pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
- if (!pptph) {
- DEBUGP("no full PPTP header, can't track\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_pptph);
- datalen -= sizeof(_pptph);
-
- /* if it's not a control message we can't do anything with it */
- if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
- DEBUGP("not a control packet\n");
- return NF_ACCEPT;
- }
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- msg = ntohs(ctlh->messageType);
- if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
- return NF_ACCEPT;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
-
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
-
- oldsstate = info->sstate;
- oldcstate = info->cstate;
-
- spin_lock_bh(&ip_pptp_lock);
-
- /* FIXME: We just blindly assume that the control connection is always
- * established from PNS->PAC. However, RFC makes no guarantee */
- if (dir == IP_CT_DIR_ORIGINAL)
- /* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
- ctinfo);
- else
- /* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
- ctinfo);
- DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
- oldsstate, info->sstate, oldcstate, info->cstate);
- spin_unlock_bh(&ip_pptp_lock);
-
- return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
- .list = { NULL, NULL },
- .name = "pptp",
- .me = THIS_MODULE,
- .max_expected = 2,
- .timeout = 5 * 60,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = IPPROTO_TCP
- }
- },
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = __constant_htons(0xffff) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = 0xff
- }
- },
- .help = conntrack_pptp_help,
- .destroy = pptp_destroy_siblings,
-};
-
-extern void ip_ct_proto_gre_fini(void);
-extern int __init ip_ct_proto_gre_init(void);
-
-/* ip_conntrack_pptp initialization */
-static int __init ip_conntrack_helper_pptp_init(void)
-{
- int retcode;
-
- retcode = ip_ct_proto_gre_init();
- if (retcode < 0)
- return retcode;
-
- DEBUGP(" registering helper\n");
- if ((retcode = ip_conntrack_helper_register(&pptp))) {
- printk(KERN_ERR "Unable to register conntrack application "
- "helper for pptp: %d\n", retcode);
- ip_ct_proto_gre_fini();
- return retcode;
- }
-
- printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit ip_conntrack_helper_pptp_fini(void)
-{
- ip_conntrack_helper_unregister(&pptp);
- ip_ct_proto_gre_fini();
- printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(ip_conntrack_helper_pptp_init);
-module_exit(ip_conntrack_helper_pptp_fini);
-
-EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
-EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index 053e591f407a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* IRC extension for IP connection tracking, Version 1.21
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.c
- *
- * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- **
- * Module load syntax:
- * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
- * max_dcc_channels=n dcc_timeout=secs
- *
- * please give the ports of all IRC servers You wish to connect to.
- * If You don't specify ports, the default will be port 6667.
- * With max_dcc_channels you can define the maximum number of not
- * yet answered DCC channels per IRC session (default 8).
- * With dcc_timeout you can specify how long the system waits for
- * an expected DCC channel (default 300 seconds).
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/moduleparam.h>
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-static unsigned int max_dcc_channels = 8;
-static unsigned int dcc_timeout = 300;
-/* This is slow, but it's simple. --RR */
-static char *irc_buffer;
-static DEFINE_SPINLOCK(irc_buffer_lock);
-
-unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
-MODULE_LICENSE("GPL");
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, uint, 0400);
-MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, uint, 0400);
-MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-
-static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
-#define MINMATCHLEN 5
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
- u_int16_t *port, char **ad_beg_p, char **ad_end_p)
-/* tries to get the ip_addr and port out of a dcc command
- return value: -1 on failure, 0 on success
- data pointer to first byte of DCC command data
- data_end pointer to last byte of dcc command data
- ip returns parsed ip of dcc command
- port returns parsed port of dcc command
- ad_beg_p returns pointer to first byte of addr data
- ad_end_p returns pointer to last byte of addr data */
-{
-
- /* at least 12: "AAAAAAAA P\1\n" */
- while (*data++ != ' ')
- if (data > data_end - 12)
- return -1;
-
- *ad_beg_p = data;
- *ip = simple_strtoul(data, &data, 10);
-
- /* skip blanks between ip and port */
- while (*data == ' ') {
- if (data >= data_end)
- return -1;
- data++;
- }
-
- *port = simple_strtoul(data, &data, 10);
- *ad_end_p = data;
-
- return 0;
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff;
- struct tcphdr _tcph, *th;
- char *data, *data_limit, *ib_ptr;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack_expect *exp;
- u32 seq;
- u_int32_t dcc_ip;
- u_int16_t dcc_port;
- int i, ret = NF_ACCEPT;
- char *addr_beg_p, *addr_end_p;
- typeof(ip_nat_irc_hook) ip_nat_irc;
-
- DEBUGP("entered\n");
-
- /* If packet is coming from IRC server */
- if (dir == IP_CT_DIR_REPLY)
- return NF_ACCEPT;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- DEBUGP("Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- /* Not a full tcp header? */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- if (dataoff >= (*pskb)->len)
- return NF_ACCEPT;
-
- spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, irc_buffer);
- BUG_ON(ib_ptr == NULL);
-
- data = ib_ptr;
- data_limit = ib_ptr + (*pskb)->len - dataoff;
-
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < (data_limit - (19 + MINMATCHLEN))) {
- if (memcmp(data, "\1DCC ", 5)) {
- data++;
- continue;
- }
-
- data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
-
- DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest));
-
- for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
- if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
- /* no match */
- continue;
- }
-
- DEBUGP("DCC %s detected\n", dccprotos[i]);
- data += strlen(dccprotos[i]);
- /* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
- * data left (== 14/13 bytes) */
- if (parse_dcc((char *)data, data_limit, &dcc_ip,
- &dcc_port, &addr_beg_p, &addr_end_p)) {
- /* unable to parse */
- DEBUGP("unable to parse dcc command\n");
- continue;
- }
- DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
- HIPQUAD(dcc_ip), dcc_port);
-
- /* dcc_ip can be the internal OR external (NAT'ed) IP
- * Tiago Sousa <mirage@kaotik.org> */
- if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
- && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "Forged DCC command from "
- "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
- HIPQUAD(dcc_ip), dcc_port);
-
- continue;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* save position of address in dcc string,
- * necessary for NAT */
- DEBUGP("tcph->seq = %u\n", th->seq);
- seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
-
- /* We refer to the reverse direction ("!dir")
- * tuples here, because we're expecting
- * something in the other * direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { ct->tuplehash[!dir].tuple.dst.ip,
- { .tcp = { htons(dcc_port) } },
- IPPROTO_TCP }});
- exp->mask = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { htonl(0xFFFFFFFF),
- { .tcp = { htons(0xFFFF) } }, 0xFF }});
- exp->expectfn = NULL;
- exp->flags = 0;
- ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
- if (ip_nat_irc)
- ret = ip_nat_irc(pskb, ctinfo,
- addr_beg_p - ib_ptr,
- addr_end_p - addr_beg_p,
- exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- goto out;
- } /* for .. NUM_DCCPROTO */
- } /* while data < ... */
-
- out:
- spin_unlock_bh(&irc_buffer_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][sizeof("irc-65535")];
-
-static void ip_conntrack_irc_fini(void);
-
-static int __init ip_conntrack_irc_init(void)
-{
- int i, ret;
- struct ip_conntrack_helper *hlpr;
- char *tmpname;
-
- if (max_dcc_channels < 1) {
- printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
- return -EBUSY;
- }
-
- irc_buffer = kmalloc(65536, GFP_KERNEL);
- if (!irc_buffer)
- return -ENOMEM;
-
- /* If no port given, default to standard irc port */
- if (ports_c == 0)
- ports[ports_c++] = IRC_PORT;
-
- for (i = 0; i < ports_c; i++) {
- hlpr = &irc_helpers[i];
- hlpr->tuple.src.u.tcp.port = htons(ports[i]);
- hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->mask.src.u.tcp.port = htons(0xFFFF);
- hlpr->mask.dst.protonum = 0xFF;
- hlpr->max_expected = max_dcc_channels;
- hlpr->timeout = dcc_timeout;
- hlpr->me = THIS_MODULE;
- hlpr->help = help;
-
- tmpname = &irc_names[i][0];
- if (ports[i] == IRC_PORT)
- sprintf(tmpname, "irc");
- else
- sprintf(tmpname, "irc-%d", i);
- hlpr->name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret = ip_conntrack_helper_register(hlpr);
-
- if (ret) {
- printk("ip_conntrack_irc: ERROR registering port %d\n",
- ports[i]);
- ip_conntrack_irc_fini();
- return -EBUSY;
- }
- }
- return 0;
-}
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void ip_conntrack_irc_fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("unregistering port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&irc_helpers[i]);
- }
- kfree(irc_buffer);
-}
-
-module_init(ip_conntrack_irc_init);
-module_exit(ip_conntrack_irc_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index cc6dd49c9da0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * NetBIOS name service broadcast connection tracking helper
- *
- * (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-/*
- * This helper tracks locally originating NetBIOS name service
- * requests by issuing permanent expectations (valid until
- * timing out) matching all reply connections from the
- * destination network. The only NetBIOS specific thing is
- * actually the port number.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#define NMBD_PORT 137
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int timeout = 3;
-module_param(timeout, uint, 0400);
-MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ip_conntrack_expect *exp;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
- struct in_device *in_dev;
- __be32 mask = 0;
-
- /* we're only interested in locally generated packets */
- if ((*pskb)->sk == NULL)
- goto out;
- if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
- goto out;
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- goto out;
-
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(rt->u.dst.dev);
- if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_broadcast == iph->daddr) {
- mask = ifa->ifa_mask;
- break;
- }
- } endfor_ifa(in_dev);
- }
- rcu_read_unlock();
-
- if (mask == 0)
- goto out;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- goto out;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
- exp->mask.src.ip = mask;
- exp->mask.src.u.udp.port = htons(0xFFFF);
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.udp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
-
- exp->expectfn = NULL;
- exp->flags = IP_CT_EXPECT_PERMANENT;
-
- ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
- ip_ct_refresh(ct, *pskb, timeout * HZ);
-out:
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper helper = {
- .name = "netbios-ns",
- .tuple = {
- .src = {
- .u = {
- .udp = {
- .port = __constant_htons(NMBD_PORT),
- }
- }
- },
- .dst = {
- .protonum = IPPROTO_UDP,
- },
- },
- .mask = {
- .src = {
- .u = {
- .udp = {
- .port = __constant_htons(0xFFFF),
- }
- }
- },
- .dst = {
- .protonum = 0xFF,
- },
- },
- .max_expected = 1,
- .me = THIS_MODULE,
- .help = help,
-};
-
-static int __init ip_conntrack_netbios_ns_init(void)
-{
- helper.timeout = timeout;
- return ip_conntrack_helper_register(&helper);
-}
-
-static void __exit ip_conntrack_netbios_ns_fini(void)
-{
- ip_conntrack_helper_unregister(&helper);
-}
-
-module_init(ip_conntrack_netbios_ns_init);
-module_exit(ip_conntrack_netbios_ns_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index 9228b76ccd9a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-/* Connection tracking via netlink socket. Allows for user space
- * protocol helpers and general trouble making from userspace.
- *
- * (C) 2001 by Jay Schulist <jschlst@samba.org>
- * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
- * Initial connection tracking via netlink development funded and
- * generally made possible by Network Robots, Inc. (www.networkrobots.com)
- *
- * Further development of this code funded by Astaro AG (http://www.astaro.com)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-MODULE_LICENSE("GPL");
-
-static char __initdata version[] = "0.90";
-
-static inline int
-ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *proto)
-{
- int ret = 0;
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-
- NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
-
- if (likely(proto->tuple_to_nfattr))
- ret = proto->tuple_to_nfattr(skb, tuple);
-
- NFA_NEST_END(skb, nest_parms);
-
- return ret;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples_ip(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
-
- NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
- NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- int ret;
- struct ip_conntrack_protocol *proto;
-
- ret = ctnetlink_dump_tuples_ip(skb, tuple);
- if (unlikely(ret < 0))
- return ret;
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
- ip_conntrack_proto_put(proto);
-
- return ret;
-}
-
-static inline int
-ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 status = htonl((u_int32_t) ct->status);
- NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- long timeout_l = ct->timeout.expires - jiffies;
- __be32 timeout;
-
- if (timeout_l < 0)
- timeout = 0;
- else
- timeout = htonl(timeout_l / HZ);
-
- NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- struct nfattr *nest_proto;
- int ret;
-
- if (!proto->to_nfattr) {
- ip_conntrack_proto_put(proto);
- return 0;
- }
-
- nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
-
- ret = proto->to_nfattr(skb, nest_proto, ct);
-
- ip_conntrack_proto_put(proto);
-
- NFA_NEST_END(skb, nest_proto);
-
- return ret;
-
-nfattr_failure:
- ip_conntrack_proto_put(proto);
- return -1;
-}
-
-static inline int
-ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct nfattr *nest_helper;
-
- if (!ct->helper)
- return 0;
-
- nest_helper = NFA_NEST(skb, CTA_HELP);
- NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
-
- if (ct->helper->to_nfattr)
- ct->helper->to_nfattr(skb, ct);
-
- NFA_NEST_END(skb, nest_helper);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
- enum ip_conntrack_dir dir)
-{
- enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
- struct nfattr *nest_count = NFA_NEST(skb, type);
- __be32 tmp;
-
- tmp = htonl(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
-
- tmp = htonl(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
-
- NFA_NEST_END(skb, nest_count);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline int
-ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 mark = htonl(ct->mark);
-
- NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_mark(a, b) (0)
-#endif
-
-static inline int
-ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 id = htonl(ct->id);
- NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 use = htonl(atomic_read(&ct->ct_general.use));
-
- NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
-static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, int nowait,
- const struct ip_conntrack *ct)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- unsigned int flags = 0, group;
-
- /* ignore our fake conntrack entry */
- if (ct == &ip_conntrack_untracked)
- return NOTIFY_DONE;
-
- if (events & IPCT_DESTROY) {
- type = IPCTNL_MSG_CT_DELETE;
- group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & (IPCT_NEW | IPCT_RELATED)) {
- type = IPCTNL_MSG_CT_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
- type = IPCTNL_MSG_CT_NEW;
- group = NFNLGRP_CONNTRACK_UPDATE;
- } else
- return NOTIFY_DONE;
-
- if (!nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- if (events & IPCT_DESTROY) {
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
- goto nfattr_failure;
- } else {
- if (ctnetlink_dump_status(skb, ct) < 0)
- goto nfattr_failure;
-
- if (ctnetlink_dump_timeout(skb, ct) < 0)
- goto nfattr_failure;
-
- if (events & IPCT_PROTOINFO
- && ctnetlink_dump_protoinfo(skb, ct) < 0)
- goto nfattr_failure;
-
- if ((events & IPCT_HELPER || ct->helper)
- && ctnetlink_dump_helpinfo(skb, ct) < 0)
- goto nfattr_failure;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
- if ((events & IPCT_MARK || ct->mark)
- && ctnetlink_dump_mark(skb, ct) < 0)
- goto nfattr_failure;
-#endif
-
- if (events & IPCT_COUNTER_FILLING &&
- (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
- goto nfattr_failure;
- }
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, group, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-static int ctnetlink_done(struct netlink_callback *cb)
-{
- if (cb->args[1])
- ip_conntrack_put((struct ip_conntrack *)cb->args[1]);
- return 0;
-}
-
-static int
-ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct, *last;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
-
- read_lock_bh(&ip_conntrack_lock);
- last = (struct ip_conntrack *)cb->args[1];
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
-restart:
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (cb->args[1]) {
- if (ct != last)
- continue;
- cb->args[1] = 0;
- }
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
- goto out;
- }
-#ifdef CONFIG_NF_CT_ACCT
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO)
- memset(&ct->counters, 0, sizeof(ct->counters));
-#endif
- }
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- }
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
- if (last)
- ip_conntrack_put(last);
-
- return skb->len;
-}
-
-static const size_t cta_min_ip[CTA_IP_MAX] = {
- [CTA_IP_V4_SRC-1] = sizeof(__be32),
- [CTA_IP_V4_DST-1] = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_IP_MAX];
-
- nfattr_parse_nested(tb, CTA_IP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
- return -EINVAL;
-
- if (!tb[CTA_IP_V4_SRC-1])
- return -EINVAL;
- tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-
- if (!tb[CTA_IP_V4_DST-1])
- return -EINVAL;
- tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
-
- return 0;
-}
-
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
- [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_proto(struct nfattr *attr,
- struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_PROTO_MAX];
- struct ip_conntrack_protocol *proto;
- int ret = 0;
-
- nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
- return -EINVAL;
-
- if (!tb[CTA_PROTO_NUM-1])
- return -EINVAL;
- tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-
- if (likely(proto->nfattr_to_tuple))
- ret = proto->nfattr_to_tuple(tb, tuple);
-
- ip_conntrack_proto_put(proto);
-
- return ret;
-}
-
-static inline int
-ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
- enum ctattr_tuple type)
-{
- struct nfattr *tb[CTA_TUPLE_MAX];
- int err;
-
- memset(tuple, 0, sizeof(*tuple));
-
- nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
-
- if (!tb[CTA_TUPLE_IP-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
- if (err < 0)
- return err;
-
- if (!tb[CTA_TUPLE_PROTO-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
- if (err < 0)
- return err;
-
- /* orig and expect tuples get DIR_ORIGINAL */
- if (type == CTA_TUPLE_REPLY)
- tuple->dst.dir = IP_CT_DIR_REPLY;
- else
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- return 0;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
- [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
- [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
-};
-
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
- const struct ip_conntrack *ct,
- struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_PROTONAT_MAX];
- struct ip_nat_protocol *npt;
-
- nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
- return -EINVAL;
-
- npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- if (!npt->nfattr_to_range) {
- ip_nat_proto_put(npt);
- return 0;
- }
-
- /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
- if (npt->nfattr_to_range(tb, range) > 0)
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
- ip_nat_proto_put(npt);
-
- return 0;
-}
-
-static const size_t cta_min_nat[CTA_NAT_MAX] = {
- [CTA_NAT_MINIP-1] = sizeof(__be32),
- [CTA_NAT_MAXIP-1] = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_nat(struct nfattr *nat,
- const struct ip_conntrack *ct, struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_NAT_MAX];
- int err;
-
- memset(range, 0, sizeof(*range));
-
- nfattr_parse_nested(tb, CTA_NAT_MAX, nat);
-
- if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
- return -EINVAL;
-
- if (tb[CTA_NAT_MINIP-1])
- range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
-
- if (!tb[CTA_NAT_MAXIP-1])
- range->max_ip = range->min_ip;
- else
- range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO-1])
- return 0;
-
- err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
- if (err < 0)
- return err;
-
- return 0;
-}
-#endif
-
-static inline int
-ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
-{
- struct nfattr *tb[CTA_HELP_MAX];
-
- nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
-
- if (!tb[CTA_HELP_NAME-1])
- return -EINVAL;
-
- *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
-
- return 0;
-}
-
-static const size_t cta_min[CTA_MAX] = {
- [CTA_STATUS-1] = sizeof(__be32),
- [CTA_TIMEOUT-1] = sizeof(__be32),
- [CTA_MARK-1] = sizeof(__be32),
- [CTA_USE-1] = sizeof(__be32),
- [CTA_ID-1] = sizeof(__be32)
-};
-
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else {
- /* Flush the whole table */
- ip_conntrack_flush();
- return 0;
- }
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h)
- return -ENOENT;
-
- ct = tuplehash_to_ctrack(h);
-
- if (cda[CTA_ID-1]) {
- u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
- if (ct->id != id) {
- ip_conntrack_put(ct);
- return -ENOENT;
- }
- }
- if (del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
-
- ip_conntrack_put(ct);
-
- return 0;
-}
-
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- struct sk_buff *skb2 = NULL;
- int err = 0;
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
-#ifndef CONFIG_IP_NF_CT_ACCT
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
- return -ENOTSUPP;
-#endif
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
-
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h)
- return -ENOENT;
-
- ct = tuplehash_to_ctrack(h);
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2) {
- ip_conntrack_put(ct);
- return -ENOMEM;
- }
-
- err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, 1, ct);
- ip_conntrack_put(ct);
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- return 0;
-
-free:
- kfree_skb(skb2);
-out:
- return err;
-}
-
-static inline int
-ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- unsigned long d;
- unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
- d = ct->status ^ status;
-
- if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
- /* unchangeable */
- return -EINVAL;
-
- if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
- /* SEEN_REPLY bit can only be set */
- return -EINVAL;
-
-
- if (d & IPS_ASSURED && !(status & IPS_ASSURED))
- /* ASSURED bit can only be set */
- return -EINVAL;
-
- if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
- return -EINVAL;
-#else
- struct ip_nat_range range;
-
- if (cda[CTA_NAT_DST-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
- &range) < 0)
- return -EINVAL;
- if (ip_nat_initialized(ct,
- HOOK2MANIP(NF_IP_PRE_ROUTING)))
- return -EEXIST;
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
- }
- if (cda[CTA_NAT_SRC-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
- &range) < 0)
- return -EINVAL;
- if (ip_nat_initialized(ct,
- HOOK2MANIP(NF_IP_POST_ROUTING)))
- return -EEXIST;
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
- }
-#endif
- }
-
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * ip_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
- return 0;
-}
-
-
-static inline int
-ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct ip_conntrack_helper *helper;
- char *helpname;
- int err;
-
- /* don't change helper of sibling connections */
- if (ct->master)
- return -EINVAL;
-
- err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
- if (err < 0)
- return err;
-
- helper = __ip_conntrack_helper_find_byname(helpname);
- if (!helper) {
- if (!strcmp(helpname, ""))
- helper = NULL;
- else
- return -EINVAL;
- }
-
- if (ct->helper) {
- if (!helper) {
- /* we had a helper before ... */
- ip_ct_remove_expectations(ct);
- ct->helper = NULL;
- } else {
- /* need to zero data of old helper */
- memset(&ct->help, 0, sizeof(ct->help));
- }
- }
-
- ct->helper = helper;
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- if (!del_timer(&ct->timeout))
- return -ETIME;
-
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
- struct ip_conntrack_protocol *proto;
- u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
- int err = 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
-
- proto = ip_conntrack_proto_find_get(npt);
-
- if (proto->from_nfattr)
- err = proto->from_nfattr(tb, ct);
- ip_conntrack_proto_put(proto);
-
- return err;
-}
-
-static int
-ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- int err;
-
- if (cda[CTA_HELP-1]) {
- err = ctnetlink_change_helper(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TIMEOUT-1]) {
- err = ctnetlink_change_timeout(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_STATUS-1]) {
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- return err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- return 0;
-}
-
-static int
-ctnetlink_create_conntrack(struct nfattr *cda[],
- struct ip_conntrack_tuple *otuple,
- struct ip_conntrack_tuple *rtuple)
-{
- struct ip_conntrack *ct;
- int err = -EINVAL;
-
- ct = ip_conntrack_alloc(otuple, rtuple);
- if (ct == NULL || IS_ERR(ct))
- return -ENOMEM;
-
- if (!cda[CTA_TIMEOUT-1])
- goto err;
- ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
- ct->status |= IPS_CONFIRMED;
-
- if (cda[CTA_STATUS-1]) {
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- goto err;
- }
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- goto err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- ct->helper = ip_conntrack_helper_find_get(rtuple);
-
- add_timer(&ct->timeout);
- ip_conntrack_hash_insert(ct);
-
- if (ct->helper)
- ip_conntrack_helper_put(ct->helper);
-
- return 0;
-
-err:
- ip_conntrack_free(ct);
- return err;
-}
-
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple otuple, rtuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1]) {
- err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TUPLE_REPLY-1]) {
- err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
- if (err < 0)
- return err;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- if (cda[CTA_TUPLE_ORIG-1])
- h = __ip_conntrack_find(&otuple, NULL);
- else if (cda[CTA_TUPLE_REPLY-1])
- h = __ip_conntrack_find(&rtuple, NULL);
-
- if (h == NULL) {
- write_unlock_bh(&ip_conntrack_lock);
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
- return err;
- }
- /* implicit 'else' */
-
- /* we only allow nat config for new conntracks */
- if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- /* We manipulate the conntrack inside the global conntrack table lock,
- * so there's no need to increase the refcount */
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
-
-out_unlock:
- write_unlock_bh(&ip_conntrack_lock);
- return err;
-}
-
-/***********************************************************************
- * EXPECT
- ***********************************************************************/
-
-static inline int
-ctnetlink_exp_dump_tuple(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- enum ctattr_expect type)
-{
- struct nfattr *nest_parms = NFA_NEST(skb, type);
-
- if (ctnetlink_dump_tuples(skb, tuple) < 0)
- goto nfattr_failure;
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_mask(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *mask)
-{
- int ret;
- struct ip_conntrack_protocol *proto;
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
-
- ret = ctnetlink_dump_tuples_ip(skb, mask);
- if (unlikely(ret < 0))
- goto nfattr_failure;
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
- ip_conntrack_proto_put(proto);
- if (unlikely(ret < 0))
- goto nfattr_failure;
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_expect(struct sk_buff *skb,
- const struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = exp->master;
- __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
- __be32 id = htonl(exp->id);
-
- if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_tuple(skb,
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- CTA_EXPECT_MASTER) < 0)
- goto nfattr_failure;
-
- NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
- NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event,
- int nowait,
- const struct ip_conntrack_expect *exp)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- int flags = 0;
-
- if (events & IPEXP_NEW) {
- type = IPCTNL_MSG_EXP_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- } else
- return NOTIFY_DONE;
-
- if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif
-
-static int
-ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack_expect *exp = NULL;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[0];
-
- read_lock_bh(&ip_conntrack_lock);
- list_for_each_prev(i, &ip_conntrack_expect_list) {
- exp = (struct ip_conntrack_expect *) i;
- if (exp->id <= *id)
- continue;
- if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_EXP_NEW,
- 1, exp) < 0)
- goto out;
- *id = exp->id;
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
-
- return skb->len;
-}
-
-static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
- [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32),
- [CTA_EXPECT_ID-1] = sizeof(__be32)
-};
-
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- struct sk_buff *skb2;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_exp_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (cda[CTA_EXPECT_MASTER-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- exp = ip_conntrack_expect_find_get(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2)
- goto out;
-
- err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
- 1, exp);
- if (err <= 0)
- goto free;
-
- ip_conntrack_expect_put(exp);
-
- return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-free:
- kfree_skb(skb2);
-out:
- ip_conntrack_expect_put(exp);
- return err;
-}
-
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_expect *exp, *tmp;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_helper *h;
- int err;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (cda[CTA_EXPECT_TUPLE-1]) {
- /* delete a single expect by tuple */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- /* bump usage count to 2 */
- exp = ip_conntrack_expect_find_get(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- __be32 id =
- *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- /* after list removal, usage count == 1 */
- ip_conntrack_unexpect_related(exp);
- /* have to put what we 'get' above.
- * after this line usage count == 0 */
- ip_conntrack_expect_put(exp);
- } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
- char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
-
- /* delete all expectations for this helper */
- write_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_helper_find_byname(name);
- if (!h) {
- write_unlock_bh(&ip_conntrack_lock);
- return -EINVAL;
- }
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (exp->master->helper == h
- && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- } else {
- /* This basically means we have to flush everything*/
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- }
-
- return 0;
-}
-static int
-ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
-{
- return -EOPNOTSUPP;
-}
-
-static int
-ctnetlink_create_expect(struct nfattr *cda[])
-{
- struct ip_conntrack_tuple tuple, mask, master_tuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- struct ip_conntrack_expect *exp;
- struct ip_conntrack *ct;
- int err = 0;
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = ip_conntrack_find_get(&master_tuple, NULL);
- if (!h)
- return -ENOENT;
- ct = tuplehash_to_ctrack(h);
-
- if (!ct->helper) {
- /* such conntrack hasn't got any helper, abort */
- err = -EINVAL;
- goto out;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
- exp->master = ct;
- memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
- memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
-
- err = ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
-out:
- ip_conntrack_put(tuplehash_to_ctrack(h));
- return err;
-}
-
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (!cda[CTA_EXPECT_TUPLE-1]
- || !cda[CTA_EXPECT_MASK-1]
- || !cda[CTA_EXPECT_MASTER-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- write_lock_bh(&ip_conntrack_lock);
- exp = __ip_conntrack_expect_find(&tuple);
-
- if (!exp) {
- write_unlock_bh(&ip_conntrack_lock);
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_expect(cda);
- return err;
- }
-
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_expect(exp, cda);
- write_unlock_bh(&ip_conntrack_lock);
-
- return err;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
-};
-
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
-};
-#endif
-
-static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
- [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
-};
-
-static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
- [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
- .attr_count = CTA_EXPECT_MAX, },
-};
-
-static struct nfnetlink_subsystem ctnl_subsys = {
- .name = "conntrack",
- .subsys_id = NFNL_SUBSYS_CTNETLINK,
- .cb_count = IPCTNL_MSG_MAX,
- .cb = ctnl_cb,
-};
-
-static struct nfnetlink_subsystem ctnl_exp_subsys = {
- .name = "conntrack_expect",
- .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
- .cb_count = IPCTNL_MSG_EXP_MAX,
- .cb = ctnl_exp_cb,
-};
-
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
-
-static int __init ctnetlink_init(void)
-{
- int ret;
-
- printk("ctnetlink v%s: registering with nfnetlink.\n", version);
- ret = nfnetlink_subsys_register(&ctnl_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register with nfnetlink.\n");
- goto err_out;
- }
-
- ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
- goto err_unreg_subsys;
- }
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ret = ip_conntrack_register_notifier(&ctnl_notifier);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register notifier.\n");
- goto err_unreg_exp_subsys;
- }
-
- ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
- if (ret < 0) {
- printk("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
-#endif
-
- return 0;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-err_unreg_exp_subsys:
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
-err_unreg_subsys:
- nfnetlink_subsys_unregister(&ctnl_subsys);
-err_out:
- return ret;
-}
-
-static void __exit ctnetlink_exit(void)
-{
- printk("ctnetlink: unregistering from nfnetlink.\n");
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
- nfnetlink_subsys_unregister(&ctnl_subsys);
- return;
-}
-
-module_init(ctnetlink_init);
-module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index 88af82e98658..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
-
-static int generic_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int generic_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return 0;
-}
-
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *state)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_generic_protocol =
-{
- .proto = 0,
- .name = "unknown",
- .pkt_to_tuple = generic_pkt_to_tuple,
- .invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
- .print_conntrack = generic_print_conntrack,
- .packet = packet,
- .new = new,
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index ac1c49ef36a9..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-static DEFINE_RWLOCK(ip_ct_gre_lock);
-
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT (30*HZ)
-#define GRE_STREAM_TIMEOUT (180*HZ)
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
- NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
- NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
- const struct ip_conntrack_tuple *t)
-{
- return ((km->tuple.src.ip == t->src.ip) &&
- (km->tuple.dst.ip == t->dst.ip) &&
- (km->tuple.dst.protonum == t->dst.protonum) &&
- (km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
- struct ip_ct_gre_keymap *km;
- __be16 key = 0;
-
- read_lock_bh(&ip_ct_gre_lock);
- list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t)) {
- key = km->tuple.src.u.gre.key;
- break;
- }
- }
- read_unlock_bh(&ip_ct_gre_lock);
-
- DEBUGP("lookup src key 0x%x up key for ", key);
- DUMP_TUPLE_GRE(t);
-
- return key;
-}
-
-/* add a single keymap entry, associate with specified master ct */
-int
-ip_ct_gre_keymap_add(struct ip_conntrack *ct,
- struct ip_conntrack_tuple *t, int reply)
-{
- struct ip_ct_gre_keymap **exist_km, *km;
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to add GRE keymap to non-pptp session\n");
- return -1;
- }
-
- if (!reply)
- exist_km = &ct->help.ct_pptp_info.keymap_orig;
- else
- exist_km = &ct->help.ct_pptp_info.keymap_reply;
-
- if (*exist_km) {
- /* check whether it's a retransmission */
- list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t) && km == *exist_km)
- return 0;
- }
- DEBUGP("trying to override keymap_%s for ct %p\n",
- reply? "reply":"orig", ct);
- return -EEXIST;
- }
-
- km = kmalloc(sizeof(*km), GFP_ATOMIC);
- if (!km)
- return -ENOMEM;
-
- memcpy(&km->tuple, t, sizeof(*t));
- *exist_km = km;
-
- DEBUGP("adding new entry %p: ", km);
- DUMP_TUPLE_GRE(&km->tuple);
-
- write_lock_bh(&ip_ct_gre_lock);
- list_add_tail(&km->list, &gre_keymap_list);
- write_unlock_bh(&ip_ct_gre_lock);
-
- return 0;
-}
-
-/* destroy the keymap entries associated with specified master ct */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
-{
- DEBUGP("entering for ct %p\n", ct);
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
- return;
- }
-
- write_lock_bh(&ip_ct_gre_lock);
- if (ct->help.ct_pptp_info.keymap_orig) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_orig);
- list_del(&ct->help.ct_pptp_info.keymap_orig->list);
- kfree(ct->help.ct_pptp_info.keymap_orig);
- ct->help.ct_pptp_info.keymap_orig = NULL;
- }
- if (ct->help.ct_pptp_info.keymap_reply) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_reply);
- list_del(&ct->help.ct_pptp_info.keymap_reply->list);
- kfree(ct->help.ct_pptp_info.keymap_reply);
- ct->help.ct_pptp_info.keymap_reply = NULL;
- }
- write_unlock_bh(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->dst.u.gre.key = orig->src.u.gre.key;
- tuple->src.u.gre.key = orig->dst.u.gre.key;
-
- return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct gre_hdr_pptp _pgrehdr, *pgrehdr;
- __be16 srckey;
- struct gre_hdr _grehdr, *grehdr;
-
- /* first only delinearize old RFC1701 GRE header */
- grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
- if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
- /* try to behave like "ip_conntrack_proto_generic" */
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
- return 1;
- }
-
- /* PPTP header is variable length, only need up to the call_id field */
- pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
- if (!pgrehdr)
- return 1;
-
- if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
- DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
- return 0;
- }
-
- tuple->dst.u.gre.key = pgrehdr->call_id;
- srckey = gre_keymap_lookup(tuple);
- tuple->src.u.gre.key = srckey;
-
- return 1;
-}
-
-/* print gre part of tuple */
-static int gre_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *ct)
-{
- return seq_printf(s, "timeout=%u, stream_timeout=%u ",
- (ct->proto.gre.timeout / HZ),
- (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
-{
- /* If we've seen traffic both ways, this is a GRE connection.
- * Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.stream_timeout);
- /* Also, more likely to be important, and not a probe. */
- set_bit(IPS_ASSURED_BIT, &ct->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
- const struct sk_buff *skb)
-{
- DEBUGP(": ");
- DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
- /* initialize to sane value. Ideally a conntrack helper
- * (e.g. in case of pptp) is increasing them */
- ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
- ct->proto.gre.timeout = GRE_TIMEOUT;
-
- return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
- struct ip_conntrack *master = ct->master;
- DEBUGP(" entering\n");
-
- if (!master)
- DEBUGP("no master !?!\n");
- else
- ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
- .proto = IPPROTO_GRE,
- .name = "gre",
- .pkt_to_tuple = gre_pkt_to_tuple,
- .invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
- .print_conntrack = gre_print_conntrack,
- .packet = gre_packet,
- .new = gre_new,
- .destroy = gre_destroy,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-/* ip_conntrack_proto_gre initialization */
-int __init ip_ct_proto_gre_init(void)
-{
- return ip_conntrack_protocol_register(&gre);
-}
-
-/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
- * init() code on errors.
- */
-void ip_ct_proto_gre_fini(void)
-{
- struct list_head *pos, *n;
-
- /* delete all keymap entries */
- write_lock_bh(&ip_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- DEBUGP("deleting keymap %p at module unload time\n", pos);
- list_del(pos);
- kfree(pos);
- }
- write_unlock_bh(&ip_ct_gre_lock);
-
- ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index ad70c81a21e0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->dst.u.icmp.type = hp->type;
- tuple->src.u.icmp.id = hp->un.echo.id;
- tuple->dst.u.icmp.code = hp->code;
-
- return 1;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
- [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
- [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
- [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
- [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
- [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
- [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
- [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
- [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- if (orig->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[orig->dst.u.icmp.type])
- return 0;
-
- tuple->src.u.icmp.id = orig->src.u.icmp.id;
- tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
- tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int icmp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- static const u_int8_t valid_new[] = {
- [ICMP_ECHO] = 1,
- [ICMP_TIMESTAMP] = 1,
- [ICMP_INFO_REQUEST] = 1,
- [ICMP_ADDRESS] = 1
- };
-
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
- /* Can't create a new ICMP `conn' with this. */
- DEBUGP("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
- DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
- return 0;
- }
- atomic_set(&conntrack->proto.icmp.count, 0);
- return 1;
-}
-
-static int
-icmp_error_message(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple innertuple, origtuple;
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } _in, *inside;
- struct ip_conntrack_protocol *innerproto;
- struct ip_conntrack_tuple_hash *h;
- int dataoff;
-
- IP_NF_ASSERT(skb->nfct == NULL);
-
- /* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
- if (inside == NULL)
- return -NF_ACCEPT;
-
- /* Ignore ICMP's containing fragments (shouldn't happen) */
- if (inside->ip.frag_off & htons(IP_OFFSET)) {
- DEBUGP("icmp_error_track: fragment of proto %u\n",
- inside->ip.protocol);
- return -NF_ACCEPT;
- }
-
- innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
- /* Are they talking about one of our connections? */
- if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
- DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
- DEBUGP("icmp_error_track: Can't invert tuple\n");
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
- ip_conntrack_proto_put(innerproto);
-
- *ctinfo = IP_CT_RELATED;
-
- h = ip_conntrack_find_get(&innertuple, NULL);
- if (!h) {
- /* Locally generated ICMPs will match inverted if they
- haven't been SNAT'ed yet */
- /* FIXME: NAT code has to handle half-done double NAT --RR */
- if (hooknum == NF_IP_LOCAL_OUT)
- h = ip_conntrack_find_get(&origtuple, NULL);
-
- if (!h) {
- DEBUGP("icmp_error_track: no match\n");
- return -NF_ACCEPT;
- }
- /* Reverse direction from that found */
- if (DIRECTION(h) != IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- } else {
- if (DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
- skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct icmphdr _ih, *icmph;
-
- /* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
- if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* See ip_conntrack_proto_tcp.c */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: bad ICMP checksum ");
- return -NF_ACCEPT;
- }
-
- /*
- * 18 is the highest 'known' ICMP type. Anything else is a mystery
- *
- * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
- * discarded.
- */
- if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: invalid ICMP type ");
- return -NF_ACCEPT;
- }
-
- /* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH
- && icmph->type != ICMP_SOURCE_QUENCH
- && icmph->type != ICMP_TIME_EXCEEDED
- && icmph->type != ICMP_PARAMETERPROB
- && icmph->type != ICMP_REDIRECT)
- return NF_ACCEPT;
-
- return icmp_error_message(skb, ctinfo, hooknum);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *t)
-{
- NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
- &t->src.u.icmp.id);
- NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
- &t->dst.u.icmp.type);
- NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
- &t->dst.u.icmp.code);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *tuple)
-{
- if (!tb[CTA_PROTO_ICMP_TYPE-1]
- || !tb[CTA_PROTO_ICMP_CODE-1]
- || !tb[CTA_PROTO_ICMP_ID-1])
- return -EINVAL;
-
- tuple->dst.u.icmp.type =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
- tuple->dst.u.icmp.code =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
- tuple->src.u.icmp.id =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
-
- if (tuple->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[tuple->dst.u.icmp.type])
- return -EINVAL;
-
- return 0;
-}
-#endif
-
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
-{
- .proto = IPPROTO_ICMP,
- .name = "icmp",
- .pkt_to_tuple = icmp_pkt_to_tuple,
- .invert_tuple = icmp_invert_tuple,
- .print_tuple = icmp_print_tuple,
- .print_conntrack = icmp_print_conntrack,
- .packet = icmp_packet,
- .new = icmp_new,
- .error = icmp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = icmp_tuple_to_nfattr,
- .nfattr_to_tuple = icmp_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index e6942992b2f6..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * Connection tracking protocol helper module for SCTP.
- *
- * SCTP is defined in RFC 2960. References to various sections in this code
- * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Added support for proc manipulation of timeouts.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/string.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
-static const char *sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
-
-static const unsigned int * sctp_timeouts[]
-= { NULL, /* SCTP_CONNTRACK_NONE */
- &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
- &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
- &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
- &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
- &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
- &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
- &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
-
-#define sNO SCTP_CONNTRACK_NONE
-#define sCL SCTP_CONNTRACK_CLOSED
-#define sCW SCTP_CONNTRACK_COOKIE_WAIT
-#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
-#define sES SCTP_CONNTRACK_ESTABLISHED
-#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
-#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
-#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
-#define sIV SCTP_CONNTRACK_MAX
-
-/*
- These are the descriptions of the states:
-
-NOTE: These state names are tantalizingly similar to the states of an
-SCTP endpoint. But the interpretation of the states is a little different,
-considering that these are the states of the connection and not of an end
-point. Please note the subtleties. -Kiran
-
-NONE - Nothing so far.
-COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
- an INIT_ACK chunk in the reply direction.
-COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
-ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
-SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
-SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
-SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
- to that of the SHUTDOWN chunk.
-CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- the SHUTDOWN chunk. Connection is closed.
-*/
-
-/* TODO
- - I have assumed that the first INIT is in the original direction.
- This messes things when an INIT comes in the reply direction in CLOSED
- state.
- - Check the error type in the reply dir before transitioning from
-cookie echoed to closed.
- - Sec 5.2.4 of RFC 2960
- - Multi Homing support.
-*/
-
-/* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- },
- {
-/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- }
-};
-
-static int sctp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- sctp_sctphdr_t _hdr, *hp;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.sctp.port = hp->source;
- tuple->dst.u.sctp.port = hp->dest;
- return 1;
-}
-
-static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- tuple->src.u.sctp.port = orig->dst.u.sctp.port;
- tuple->dst.u.sctp.port = orig->src.u.sctp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int sctp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum sctp_conntrack state;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- read_lock_bh(&sctp_lock);
- state = conntrack->proto.sctp.state;
- read_unlock_bh(&sctp_lock);
-
- return seq_printf(s, "%s ", sctp_conntrack_names[state]);
-}
-
-#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \
-for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
- offset < skb->len && \
- (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
- offset += (ntohs(sch->length) + 3) & ~3, count++)
-
-/* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- char *map)
-{
- u_int32_t offset, count;
- sctp_chunkhdr_t _sch, *sch;
- int flag;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- flag = 0;
-
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
-
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK
- || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- flag = 1;
- }
-
- /*
- * Cookie Ack/Echo chunks not the first OR
- * Init / Init Ack / Shutdown compl chunks not the only chunks
- * OR zero-length.
- */
- if (((sch->type == SCTP_CID_COOKIE_ACK
- || sch->type == SCTP_CID_COOKIE_ECHO
- || flag)
- && count !=0) || !sch->length) {
- DEBUGP("Basic checks failed\n");
- return 1;
- }
-
- if (map) {
- set_bit(sch->type, (void *)map);
- }
- }
-
- DEBUGP("Basic checks passed\n");
- return count == 0;
-}
-
-static int new_state(enum ip_conntrack_dir dir,
- enum sctp_conntrack cur_state,
- int chunk_type)
-{
- int i;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- DEBUGP("Chunk type: %d\n", chunk_type);
-
- switch (chunk_type) {
- case SCTP_CID_INIT:
- DEBUGP("SCTP_CID_INIT\n");
- i = 0; break;
- case SCTP_CID_INIT_ACK:
- DEBUGP("SCTP_CID_INIT_ACK\n");
- i = 1; break;
- case SCTP_CID_ABORT:
- DEBUGP("SCTP_CID_ABORT\n");
- i = 2; break;
- case SCTP_CID_SHUTDOWN:
- DEBUGP("SCTP_CID_SHUTDOWN\n");
- i = 3; break;
- case SCTP_CID_SHUTDOWN_ACK:
- DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
- i = 4; break;
- case SCTP_CID_ERROR:
- DEBUGP("SCTP_CID_ERROR\n");
- i = 5; break;
- case SCTP_CID_COOKIE_ECHO:
- DEBUGP("SCTP_CID_COOKIE_ECHO\n");
- i = 6; break;
- case SCTP_CID_COOKIE_ACK:
- DEBUGP("SCTP_CID_COOKIE_ACK\n");
- i = 7; break;
- case SCTP_CID_SHUTDOWN_COMPLETE:
- DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
- i = 8; break;
- default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
- DEBUGP("Unknown chunk type, Will stay in %s\n",
- sctp_conntrack_names[cur_state]);
- return cur_state;
- }
-
- DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
- dir, sctp_conntrack_names[cur_state], chunk_type,
- sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
- return sctp_conntracks[dir][i][cur_state];
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum sctp_conntrack newconntrack, oldsctpstate;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return -1;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return -1;
-
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
- && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
- && !test_bit(SCTP_CID_ABORT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
- && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- DEBUGP("Verification tag check failed\n");
- return -1;
- }
-
- oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- write_lock_bh(&sctp_lock);
-
- /* Special cases of Verification tag check (Sec 8.5.1) */
- if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
- if (sh->vtag != 0) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)]
- && (sch->flags & 1))) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- }
-
- oldsctpstate = conntrack->proto.sctp.state;
- newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
-
- /* Invalid */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
- CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
- write_unlock_bh(&sctp_lock);
- return -1;
- }
-
- /* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- DEBUGP("Setting vtag %x for dir %d\n",
- ih->init_tag, !CTINFO2DIR(ctinfo));
- conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- if (oldsctpstate != newconntrack)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
- write_unlock_bh(&sctp_lock);
- }
-
- ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
- if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
- && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
- && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
- DEBUGP("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int sctp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum sctp_conntrack newconntrack;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return 0;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return 0;
-
- /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
- if ((test_bit (SCTP_CID_ABORT, (void *)map))
- || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
- || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
- return 0;
- }
-
- newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- /* Don't need lock here: this conntrack not in circulation yet */
- newconntrack = new_state (IP_CT_DIR_ORIGINAL,
- SCTP_CONNTRACK_NONE, sch->type);
-
- /* Invalid: delete conntrack */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
- return 0;
- }
-
- /* Copy the vtag into the state info */
- if (sch->type == SCTP_CID_INIT) {
- if (sh->vtag == 0) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
- return 0;
-
- DEBUGP("Setting vtag %x for new conn\n",
- ih->init_tag);
-
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
- ih->init_tag;
- } else {
- /* Sec 8.5.1 (A) */
- return 0;
- }
- }
- /* If it is a shutdown ack OOTB packet, we expect a return
- shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
- else {
- DEBUGP("Setting vtag %x for new conn OOTB\n",
- sh->vtag);
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- }
-
- return 1;
-}
-
-static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
- .proto = IPPROTO_SCTP,
- .name = "sctp",
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
- .print_conntrack = sctp_print_conntrack,
- .packet = sctp_packet,
- .new = sctp_new,
- .destroy = NULL,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-#ifdef CONFIG_SYSCTL
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
- .procname = "ip_conntrack_sctp_timeout_closed",
- .data = &ip_ct_sctp_timeout_closed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .data = &ip_ct_sctp_timeout_cookie_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .data = &ip_ct_sctp_timeout_cookie_echoed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_sctp_timeout_established",
- .data = &ip_ct_sctp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .data = &ip_ct_sctp_timeout_shutdown_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .data = &ip_ct_sctp_timeout_shutdown_recd,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-#endif
-
-static int __init ip_conntrack_proto_sctp_init(void)
-{
- int ret;
-
- ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
- if (ret) {
- printk("ip_conntrack_proto_sctp: protocol register failed\n");
- goto out;
- }
-
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
- if (ip_ct_sysctl_header == NULL) {
- ret = -ENOMEM;
- printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
- goto cleanup;
- }
-#endif
-
- return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup:
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#endif
- out:
- DEBUGP("SCTP conntrack module loading %s\n",
- ret ? "failed": "succeeded");
- return ret;
-}
-
-static void __exit ip_conntrack_proto_sctp_fini(void)
-{
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
- DEBUGP("SCTP conntrack module unloaded\n");
-}
-
-module_init(ip_conntrack_proto_sctp_init);
-module_exit(ip_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index 0a72eab14620..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1164 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- * - Real stateful connection tracking
- * - Modified state transitions table
- * - Window scaling support added
- * - SACK support added
- *
- * Willy Tarreau:
- * - State table bugfixes
- * - More robust state changes
- * - Tuning timer parameters
- *
- * version 2.2
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/spinlock.h>
-
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
-/* "Be conservative in what you do,
- be liberal in what you accept from others."
- If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
- connections. */
-int ip_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
- ACK from the destination. If this number is reached, a shorter timer
- will be started. */
-int ip_ct_tcp_max_retrans __read_mostly = 3;
-
- /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR */
-
-static const char *tcp_conntrack_names[] = {
- "NONE",
- "SYN_SENT",
- "SYN_RECV",
- "ESTABLISHED",
- "FIN_WAIT",
- "CLOSE_WAIT",
- "LAST_ACK",
- "TIME_WAIT",
- "CLOSE",
- "LISTEN"
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
-unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
-
-/* RFC1122 says the R2 limit should be at least 100 seconds.
- Linux uses 15 packets as limit, which corresponds
- to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
-
-static const unsigned int * tcp_timeouts[]
-= { NULL, /* TCP_CONNTRACK_NONE */
- &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
- &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
- &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
- &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
- &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
- &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
- &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
- &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
- NULL, /* TCP_CONNTRACK_LISTEN */
- };
-
-#define sNO TCP_CONNTRACK_NONE
-#define sSS TCP_CONNTRACK_SYN_SENT
-#define sSR TCP_CONNTRACK_SYN_RECV
-#define sES TCP_CONNTRACK_ESTABLISHED
-#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sCW TCP_CONNTRACK_CLOSE_WAIT
-#define sLA TCP_CONNTRACK_LAST_ACK
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
-#define sIV TCP_CONNTRACK_MAX
-#define sIG TCP_CONNTRACK_IGNORE
-
-/* What TCP flags are set from RST/SYN/FIN/ACK. */
-enum tcp_bit_set {
- TCP_SYN_SET,
- TCP_SYNACK_SET,
- TCP_FIN_SET,
- TCP_ACK_SET,
- TCP_RST_SET,
- TCP_NONE_SET,
-};
-
-/*
- * The TCP state transition table needs a few words...
- *
- * We are the man in the middle. All the packets go through us
- * but might get lost in transit to the destination.
- * It is assumed that the destinations can't receive segments
- * we haven't seen.
- *
- * The checked segment is in window, but our windows are *not*
- * equivalent with the ones of the sender/receiver. We always
- * try to guess the state of the current sender.
- *
- * The meaning of the states are:
- *
- * NONE: initial state
- * SYN_SENT: SYN-only packet seen
- * SYN_RECV: SYN-ACK packet seen
- * ESTABLISHED: ACK packet seen
- * FIN_WAIT: FIN packet seen
- * CLOSE_WAIT: ACK seen (after FIN)
- * LAST_ACK: FIN seen (after FIN)
- * TIME_WAIT: last ACK seen
- * CLOSE: closed connection
- *
- * LISTEN state is not used.
- *
- * Packets marked as IGNORED (sIG):
- * if they may be either invalid or valid
- * and the receiver may send back a connection
- * closing RST or a SYN/ACK.
- *
- * Packets marked as INVALID (sIV):
- * if they are invalid
- * or we do not support the request (simultaneous open)
- */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
-/*
- * sNO -> sSS Initialize a new connection
- * sSS -> sSS Retransmitted SYN
- * sSR -> sIG Late retransmitted SYN?
- * sES -> sIG Error: SYNs in window outside the SYN_SENT state
- * are errors. Receiver will reply with RST
- * and close the connection.
- * Or we are not in sync and hold a dead connection.
- * sFW -> sIG
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sSS Reopened connection (RFC 1122).
- * sCL -> sSS
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * A SYN/ACK from the client is always invalid:
- * - either it tries to set up a simultaneous open, which is
- * not supported;
- * - or the firewall has just been inserted between the two hosts
- * during the session set-up. The SYN will be retransmitted
- * by the true client (or it'll time out).
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sNO -> sIV Too late and no reason to do anything...
- * sSS -> sIV Client migth not send FIN in this state:
- * we enforce waiting for a SYN/ACK reply first.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions, waiting for
- * the last ACK.
- * Migth be a retransmitted FIN as well...
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN. Remain in the same state.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sNO -> sES Assumed.
- * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
- * sSR -> sES Established state is reached.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK. Remain in the same state.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- },
- {
-/* REPLY */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * sNO -> sIV Never reached.
- * sSS -> sIV Simultaneous open, not supported
- * sSR -> sIV Simultaneous open, not supported.
- * sES -> sIV Server may not initiate a connection.
- * sFW -> sIV
- * sCW -> sIV
- * sLA -> sIV
- * sTW -> sIV Reopened connection, but server may not do it.
- * sCL -> sIV
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
-/*
- * sSS -> sSR Standard open.
- * sSR -> sSR Retransmitted SYN/ACK.
- * sES -> sIG Late retransmitted SYN/ACK?
- * sFW -> sIG Might be SYN/ACK answering ignored SYN
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sIG
- * sCL -> sIG
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sSS -> sIV Server might not send FIN in this state.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions.
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sSS -> sIG Might be a half-open connection.
- * sSR -> sSR Might answer late resent SYN.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- }
-};
-
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct tcphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.tcp.port = hp->source;
- tuple->dst.u.tcp.port = hp->dest;
-
- return 1;
-}
-
-static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.tcp.port = orig->dst.u.tcp.port;
- tuple->dst.u.tcp.port = orig->src.u.tcp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum tcp_conntrack state;
-
- read_lock_bh(&tcp_lock);
- state = conntrack->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
-
- return seq_printf(s, "%s ", tcp_conntrack_names[state]);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
- const struct ip_conntrack *ct)
-{
- struct nfattr *nest_parms;
-
- read_lock_bh(&tcp_lock);
- nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
- NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
- &ct->proto.tcp.state);
- read_unlock_bh(&tcp_lock);
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- read_unlock_bh(&tcp_lock);
- return -1;
-}
-
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
-};
-
-static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
-{
- struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
- struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
-
- /* updates could not contain anything about the private
- * protocol info, in that case skip the parsing */
- if (!attr)
- return 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
- return -EINVAL;
-
- if (!tb[CTA_PROTOINFO_TCP_STATE-1])
- return -EINVAL;
-
- write_lock_bh(&tcp_lock);
- ct->proto.tcp.state =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
- write_unlock_bh(&tcp_lock);
-
- return 0;
-}
-#endif
-
-static unsigned int get_conntrack_index(const struct tcphdr *tcph)
-{
- if (tcph->rst) return TCP_RST_SET;
- else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
- else if (tcph->fin) return TCP_FIN_SET;
- else if (tcph->ack) return TCP_ACK_SET;
- else return TCP_NONE_SET;
-}
-
-/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
- in IP Filter' by Guido van Rooij.
-
- http://www.nluug.nl/events/sane2000/papers.html
- http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
-
- The boundaries and the conditions are changed according to RFC793:
- the packet must intersect the window (i.e. segments may be
- after the right or before the left edge) and thus receivers may ACK
- segments after the right edge of the window.
-
- td_maxend = max(sack + max(win,1)) seen in reply packets
- td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
- td_maxwin += seq + len - sender.td_maxend
- if seq + len > sender.td_maxend
- td_end = max(seq + len) seen in sent packets
-
- I. Upper bound for valid data: seq <= sender.td_maxend
- II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
- III. Upper bound for valid ack: sack <= receiver.td_end
- IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
-
- where sack is the highest right edge of sack block found in the packet.
-
- The upper bound limit for a valid ack is not ignored -
- we doesn't have to deal with fragments.
-*/
-
-static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- return (seq + len - (iph->ihl + tcph->doff)*4
- + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
-}
-
-/* Fixme: what about big packets? */
-#define MAXACKWINCONST 66000
-#define MAXACKWINDOW(sender) \
- ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
- : MAXACKWINCONST)
-
-/*
- * Simplified tcp_parse_options routine from tcp_input.c
- */
-static void tcp_options(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- struct ip_ct_tcp_state *state)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- state->td_scale =
- state->flags = 0;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK_PERM
- && opsize == TCPOLEN_SACK_PERM)
- state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
- else if (opcode == TCPOPT_WINDOW
- && opsize == TCPOLEN_WINDOW) {
- state->td_scale = *(u_int8_t *)ptr;
-
- if (state->td_scale > 14) {
- /* See RFC1323 */
- state->td_scale = 14;
- }
- state->flags |=
- IP_CT_TCP_FLAG_WINDOW_SCALE;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static void tcp_sack(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- __u32 *sack)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
- __u32 tmp;
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- /* Fast path for timestamp-only option */
- if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__be32 *)ptr ==
- __constant_htonl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
- return;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize, i;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK
- && opsize >= (TCPOLEN_SACK_BASE
- + TCPOLEN_SACK_PERBLOCK)
- && !((opsize - TCPOLEN_SACK_BASE)
- % TCPOLEN_SACK_PERBLOCK)) {
- for (i = 0;
- i < (opsize - TCPOLEN_SACK_BASE);
- i += TCPOLEN_SACK_PERBLOCK) {
- tmp = ntohl(*((__be32 *)(ptr+i)+1));
-
- if (after(tmp, *sack))
- *sack = tmp;
- }
- return;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static int tcp_in_window(struct ip_ct_tcp *state,
- enum ip_conntrack_dir dir,
- unsigned int index,
- const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- struct ip_ct_tcp_state *sender = &state->seen[dir];
- struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- __u32 seq, ack, sack, end, win, swin;
- int res;
-
- /*
- * Get the required data from the packet.
- */
- seq = ntohl(tcph->seq);
- ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
- end = segment_seq_plus_len(seq, skb->len, iph, tcph);
-
- if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
- tcp_sack(skb, iph, tcph, &sack);
-
- DEBUGP("tcp_in_window: START\n");
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack=%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- if (sender->td_end == 0) {
- /*
- * Initialize sender data.
- */
- if (tcph->syn && tcph->ack) {
- /*
- * Outgoing SYN-ACK in reply to a SYN.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- sender->td_end = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- sender->td_maxend = end + sender->td_maxwin;
- }
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
- /*
- * RFC 793: "if a TCP is reinitialized ... then it need
- * not wait at all; it must only be sure to use sequence
- * numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- }
-
- if (!(tcph->ack)) {
- /*
- * If there is no ACK, just pretend it was set and OK.
- */
- ack = sack = receiver->td_end;
- } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
- (TCP_FLAG_ACK|TCP_FLAG_RST))
- && (ack == 0)) {
- /*
- * Broken TCP stacks, that set ACK in RST packets as well
- * with zero ack value.
- */
- ack = sack = receiver->td_end;
- }
-
- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
- /*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
- */
- seq = end = sender->td_end;
-
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack =%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- after(end, sender->td_end - receiver->td_maxwin - 1),
- before(sack, receiver->td_end + 1),
- after(ack, receiver->td_end - MAXACKWINDOW(sender)));
-
- if (before(seq, sender->td_maxend + 1) &&
- after(end, sender->td_end - receiver->td_maxwin - 1) &&
- before(sack, receiver->td_end + 1) &&
- after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
- /*
- * Take into account window scaling (RFC 1323).
- */
- if (!tcph->syn)
- win <<= sender->td_scale;
-
- /*
- * Update sender data.
- */
- swin = win + (sack - ack);
- if (sender->td_maxwin < swin)
- sender->td_maxwin = swin;
- if (after(end, sender->td_end))
- sender->td_end = end;
- /*
- * Update receiver data.
- */
- if (after(end, sender->td_maxend))
- receiver->td_maxwin += end - sender->td_maxend;
- if (after(sack + win, receiver->td_maxend - 1)) {
- receiver->td_maxend = sack + win;
- if (win == 0)
- receiver->td_maxend++;
- }
-
- /*
- * Check retransmissions.
- */
- if (index == TCP_ACK_SET) {
- if (state->last_dir == dir
- && state->last_seq == seq
- && state->last_ack == ack
- && state->last_end == end
- && state->last_win == win)
- state->retrans++;
- else {
- state->last_dir = dir;
- state->last_seq = seq;
- state->last_ack = ack;
- state->last_end = end;
- state->last_win = win;
- state->retrans = 0;
- }
- }
- res = 1;
- } else {
- res = 0;
- if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
- ip_ct_tcp_be_liberal)
- res = 1;
- if (!res && LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: %s ",
- before(seq, sender->td_maxend + 1) ?
- after(end, sender->td_end - receiver->td_maxwin - 1) ?
- before(sack, receiver->td_end + 1) ?
- after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
- : "ACK is under the lower bound (possible overly delayed ACK)"
- : "ACK is over the upper bound (ACKed data not seen yet)"
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
- }
-
- DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
- return res;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-void ip_conntrack_tcp_update(struct sk_buff *skb,
- struct ip_conntrack *conntrack,
- enum ip_conntrack_dir dir)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
- __u32 end;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
-
- end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
-
- write_lock_bh(&tcp_lock);
- /*
- * We have to worry for the ack in the reply packet only...
- */
- if (after(end, conntrack->proto.tcp.seen[dir].td_end))
- conntrack->proto.tcp.seen[dir].td_end = end;
- conntrack->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
- DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-}
-
-#endif
-
-#define TH_FIN 0x01
-#define TH_SYN 0x02
-#define TH_RST 0x04
-#define TH_PUSH 0x08
-#define TH_ACK 0x10
-#define TH_URG 0x20
-#define TH_ECE 0x40
-#define TH_CWR 0x80
-
-/* table of valid flag combinations - ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
-{
- [TH_SYN] = 1,
- [TH_SYN|TH_PUSH] = 1,
- [TH_SYN|TH_URG] = 1,
- [TH_SYN|TH_PUSH|TH_URG] = 1,
- [TH_SYN|TH_ACK] = 1,
- [TH_SYN|TH_ACK|TH_PUSH] = 1,
- [TH_RST] = 1,
- [TH_RST|TH_ACK] = 1,
- [TH_RST|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK] = 1,
- [TH_ACK] = 1,
- [TH_ACK|TH_PUSH] = 1,
- [TH_ACK|TH_URG] = 1,
- [TH_ACK|TH_URG|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_URG] = 1,
- [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
-};
-
-/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr _tcph, *th;
- unsigned int tcplen = skb->len - iph->ihl * 4;
- u_int8_t tcpflags;
-
- /* Smaller that minimal TCP header? */
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Not whole TCP header or malformed packet */
- if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because it is assumed to be correct.
- */
- /* FIXME: Source route IP option packets --RR */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: bad TCP checksum ");
- return -NF_ACCEPT;
- }
-
- /* Check TCP flags. */
- tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
- if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid TCP flag combination ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum tcp_conntrack new_state, old_state;
- enum ip_conntrack_dir dir;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
- unsigned long timeout;
- unsigned int index;
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- write_lock_bh(&tcp_lock);
- old_state = conntrack->proto.tcp.state;
- dir = CTINFO2DIR(ctinfo);
- index = get_conntrack_index(th);
- new_state = tcp_conntracks[dir][index][old_state];
-
- switch (new_state) {
- case TCP_CONNTRACK_IGNORE:
- /* Ignored packets:
- *
- * a) SYN in ORIGINAL
- * b) SYN/ACK in REPLY
- * c) ACK in reply direction after initial SYN in original.
- */
- if (index == TCP_SYNACK_SET
- && conntrack->proto.tcp.last_index == TCP_SYN_SET
- && conntrack->proto.tcp.last_dir != dir
- && ntohl(th->ack_seq) ==
- conntrack->proto.tcp.last_end) {
- /* This SYN/ACK acknowledges a SYN that we earlier
- * ignored as invalid. This means that the client and
- * the server are both in sync, while the firewall is
- * not. We kill this session and block the SYN/ACK so
- * that the client cannot but retransmit its SYN and
- * thus initiate a clean new session.
- */
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: "
- "killing out of sync session ");
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_DROP;
- }
- conntrack->proto.tcp.last_index = index;
- conntrack->proto.tcp.last_dir = dir;
- conntrack->proto.tcp.last_seq = ntohl(th->seq);
- conntrack->proto.tcp.last_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
-
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid packet ignored ");
- return NF_ACCEPT;
- case TCP_CONNTRACK_MAX:
- /* Invalid packet */
- DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
- dir, get_conntrack_index(th),
- old_state);
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid state ");
- return -NF_ACCEPT;
- case TCP_CONNTRACK_SYN_SENT:
- if (old_state < TCP_CONNTRACK_TIME_WAIT)
- break;
- if ((conntrack->proto.tcp.seen[dir].flags &
- IP_CT_TCP_FLAG_CLOSE_INIT)
- || after(ntohl(th->seq),
- conntrack->proto.tcp.seen[dir].td_end)) {
- /* Attempt to reopen a closed connection.
- * Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_REPEAT;
- } else {
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: invalid SYN");
- return -NF_ACCEPT;
- }
- case TCP_CONNTRACK_CLOSE:
- if (index == TCP_RST_SET
- && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_SYN_SET)
- || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_ACK_SET))
- && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN or ACK we had let through
- * at a) and c) above:
- *
- * a) SYN was in window then
- * c) we hold a half-open connection.
- *
- * Delete our connection entry.
- * We skip window checking, because packet might ACK
- * segments we ignored. */
- goto in_window;
- }
- /* Just fall through */
- default:
- /* Keep compilers happy. */
- break;
- }
-
- if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
- skb, iph, th)) {
- write_unlock_bh(&tcp_lock);
- return -NF_ACCEPT;
- }
- in_window:
- /* From now on we have got in-window packets */
- conntrack->proto.tcp.last_index = index;
-
- DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest),
- (th->syn ? 1 : 0), (th->ack ? 1 : 0),
- (th->fin ? 1 : 0), (th->rst ? 1 : 0),
- old_state, new_state);
-
- conntrack->proto.tcp.state = new_state;
- if (old_state != new_state
- && (new_state == TCP_CONNTRACK_FIN_WAIT
- || new_state == TCP_CONNTRACK_CLOSE))
- conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
- timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
- && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
- ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
-
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- if (new_state != old_state)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-
- if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- /* If only reply is a RST, we can consider ourselves not to
- have an established connection: this is a fairly common
- problem case, so we can delete the conntrack
- immediately. --RR */
- if (th->rst) {
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return NF_ACCEPT;
- }
- } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && (old_state == TCP_CONNTRACK_SYN_RECV
- || old_state == TCP_CONNTRACK_ESTABLISHED)
- && new_state == TCP_CONNTRACK_ESTABLISHED) {
- /* Set ASSURED if we see see valid ack in ESTABLISHED
- after SYN_RECV or a valid answer for a picked up
- connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
- ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum tcp_conntrack new_state;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- /* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
-
- /* Invalid: delete conntrack */
- if (new_state >= TCP_CONNTRACK_MAX) {
- DEBUGP("ip_ct_tcp: invalid new deleting.\n");
- return 0;
- }
-
- if (new_state == TCP_CONNTRACK_SYN_SENT) {
- /* SYN packet */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end;
-
- tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
- conntrack->proto.tcp.seen[1].flags = 0;
- } else if (ip_ct_tcp_loose == 0) {
- /* Don't try to pick up connections. */
- return 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end +
- conntrack->proto.tcp.seen[0].td_maxwin;
- conntrack->proto.tcp.seen[0].td_scale = 0;
-
- /* We assume SACK and liberal window checking to handle
- * window scaling */
- conntrack->proto.tcp.seen[0].flags =
- conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
- IP_CT_TCP_FLAG_BE_LIBERAL;
- }
-
- conntrack->proto.tcp.seen[1].td_end = 0;
- conntrack->proto.tcp.seen[1].td_maxend = 0;
- conntrack->proto.tcp.seen[1].td_maxwin = 1;
- conntrack->proto.tcp.seen[1].td_scale = 0;
-
- /* tcp_packet will set them */
- conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
- conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
- DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
-{
- .proto = IPPROTO_TCP,
- .name = "tcp",
- .pkt_to_tuple = tcp_pkt_to_tuple,
- .invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
- .print_conntrack = tcp_print_conntrack,
- .packet = tcp_packet,
- .new = tcp_new,
- .error = tcp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .to_nfattr = tcp_to_nfattr,
- .from_nfattr = nfattr_to_tcp,
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 14c30c646c7f..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/seq_file.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
-
-static int udp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct udphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.udp.port = hp->source;
- tuple->dst.u.udp.port = hp->dest;
-
- return 1;
-}
-
-static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.udp.port = orig->dst.u.udp.port;
- tuple->dst.u.udp.port = orig->src.u.udp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int udp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* If we've seen traffic both ways, this is some kind of UDP
- stream. Extend timeout. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh_acct(conntrack, ctinfo, skb,
- ip_ct_udp_timeout_stream);
- /* Also, more likely to be important, and not a probe */
- if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- unsigned int udplen = skb->len - iph->ihl * 4;
- struct udphdr _hdr, *hdr;
-
- /* Header is too small? */
- hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
- if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Truncated/malformed packets */
- if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Packet with no checksum */
- if (!hdr->check)
- return NF_ACCEPT;
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because the checksum is assumed to be correct.
- * FIXME: Source route IP option packets --RR */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: bad UDP checksum ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_udp =
-{
- .proto = IPPROTO_UDP,
- .name = "udp",
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
- .print_conntrack = udp_print_conntrack,
- .packet = udp_packet,
- .new = udp_new,
- .error = udp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
deleted file mode 100644
index c59a962c1f61..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* SIP extension for IP connection tracking.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_conntrack_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP connection tracking helper");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of sip servers");
-
-static unsigned int sip_timeout = SIP_TIMEOUT;
-module_param(sip_timeout, uint, 0600);
-MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-
-unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sip_hook);
-
-unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp,
- const char *dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
-
-static int digits_len(const char *dptr, const char *limit, int *shift);
-static int epaddr_len(const char *dptr, const char *limit, int *shift);
-static int skp_digits_len(const char *dptr, const char *limit, int *shift);
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
-
-struct sip_header_nfo {
- const char *lname;
- const char *sname;
- const char *ln_str;
- size_t lnlen;
- size_t snlen;
- size_t ln_strlen;
- int case_sensitive;
- int (*match_len)(const char *, const char *, int *);
-};
-
-static struct sip_header_nfo ct_sip_hdrs[] = {
- [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */
- .lname = "sip:",
- .lnlen = sizeof("sip:") - 1,
- .ln_str = ":",
- .ln_strlen = sizeof(":") - 1,
- .match_len = epaddr_len
- },
- [POS_REQ_URI] = { /* SIP request URI */
- .lname = "sip:",
- .lnlen = sizeof("sip:") - 1,
- .ln_str = "@",
- .ln_strlen = sizeof("@") - 1,
- .match_len = epaddr_len
- },
- [POS_FROM] = { /* SIP From header */
- .lname = "From:",
- .lnlen = sizeof("From:") - 1,
- .sname = "\r\nf:",
- .snlen = sizeof("\r\nf:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len,
- },
- [POS_TO] = { /* SIP To header */
- .lname = "To:",
- .lnlen = sizeof("To:") - 1,
- .sname = "\r\nt:",
- .snlen = sizeof("\r\nt:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len,
- },
- [POS_VIA] = { /* SIP Via header */
- .lname = "Via:",
- .lnlen = sizeof("Via:") - 1,
- .sname = "\r\nv:",
- .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
- .ln_str = "UDP ",
- .ln_strlen = sizeof("UDP ") - 1,
- .match_len = epaddr_len,
- },
- [POS_CONTACT] = { /* SIP Contact header */
- .lname = "Contact:",
- .lnlen = sizeof("Contact:") - 1,
- .sname = "\r\nm:",
- .snlen = sizeof("\r\nm:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len
- },
- [POS_CONTENT] = { /* SIP Content length header */
- .lname = "Content-Length:",
- .lnlen = sizeof("Content-Length:") - 1,
- .sname = "\r\nl:",
- .snlen = sizeof("\r\nl:") - 1,
- .ln_str = ":",
- .ln_strlen = sizeof(":") - 1,
- .match_len = skp_digits_len
- },
- [POS_MEDIA] = { /* SDP media info */
- .case_sensitive = 1,
- .lname = "\nm=",
- .lnlen = sizeof("\nm=") - 1,
- .sname = "\rm=",
- .snlen = sizeof("\rm=") - 1,
- .ln_str = "audio ",
- .ln_strlen = sizeof("audio ") - 1,
- .match_len = digits_len
- },
- [POS_OWNER] = { /* SDP owner address*/
- .case_sensitive = 1,
- .lname = "\no=",
- .lnlen = sizeof("\no=") - 1,
- .sname = "\ro=",
- .snlen = sizeof("\ro=") - 1,
- .ln_str = "IN IP4 ",
- .ln_strlen = sizeof("IN IP4 ") - 1,
- .match_len = epaddr_len
- },
- [POS_CONNECTION] = { /* SDP connection info */
- .case_sensitive = 1,
- .lname = "\nc=",
- .lnlen = sizeof("\nc=") - 1,
- .sname = "\rc=",
- .snlen = sizeof("\rc=") - 1,
- .ln_str = "IN IP4 ",
- .ln_strlen = sizeof("IN IP4 ") - 1,
- .match_len = epaddr_len
- },
- [POS_SDP_HEADER] = { /* SDP version header */
- .case_sensitive = 1,
- .lname = "\nv=",
- .lnlen = sizeof("\nv=") - 1,
- .sname = "\rv=",
- .snlen = sizeof("\rv=") - 1,
- .ln_str = "=",
- .ln_strlen = sizeof("=") - 1,
- .match_len = digits_len
- }
-};
-
-/* get line lenght until first CR or LF seen. */
-int ct_sip_lnlen(const char *line, const char *limit)
-{
- const char *k = line;
-
- while ((line <= limit) && (*line == '\r' || *line == '\n'))
- line++;
-
- while (line <= limit) {
- if (*line == '\r' || *line == '\n')
- break;
- line++;
- }
- return line - k;
-}
-EXPORT_SYMBOL_GPL(ct_sip_lnlen);
-
-/* Linear string search, case sensitive. */
-const char *ct_sip_search(const char *needle, const char *haystack,
- size_t needle_len, size_t haystack_len,
- int case_sensitive)
-{
- const char *limit = haystack + (haystack_len - needle_len);
-
- while (haystack <= limit) {
- if (case_sensitive) {
- if (strncmp(haystack, needle, needle_len) == 0)
- return haystack;
- } else {
- if (strnicmp(haystack, needle, needle_len) == 0)
- return haystack;
- }
- haystack++;
- }
- return NULL;
-}
-EXPORT_SYMBOL_GPL(ct_sip_search);
-
-static int digits_len(const char *dptr, const char *limit, int *shift)
-{
- int len = 0;
- while (dptr <= limit && isdigit(*dptr)) {
- dptr++;
- len++;
- }
- return len;
-}
-
-/* get digits lenght, skiping blank spaces. */
-static int skp_digits_len(const char *dptr, const char *limit, int *shift)
-{
- for (; dptr <= limit && *dptr == ' '; dptr++)
- (*shift)++;
-
- return digits_len(dptr, limit, shift);
-}
-
-/* Simple ipaddr parser.. */
-static int parse_ipaddr(const char *cp, const char **endp,
- __be32 *ipaddr, const char *limit)
-{
- unsigned long int val;
- int i, digit = 0;
-
- for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) {
- digit = 0;
- if (!isdigit(*cp))
- break;
-
- val = simple_strtoul(cp, (char **)&cp, 10);
- if (val > 0xFF)
- return -1;
-
- ((u_int8_t *)ipaddr)[i] = val;
- digit = 1;
-
- if (*cp != '.')
- break;
- cp++;
- }
- if (!digit)
- return -1;
-
- if (endp)
- *endp = cp;
-
- return 0;
-}
-
-/* skip ip address. returns it lenght. */
-static int epaddr_len(const char *dptr, const char *limit, int *shift)
-{
- const char *aux = dptr;
- __be32 ip;
-
- if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
- DEBUGP("ip: %s parse failed.!\n", dptr);
- return 0;
- }
-
- /* Port number */
- if (*dptr == ':') {
- dptr++;
- dptr += digits_len(dptr, limit, shift);
- }
- return dptr - aux;
-}
-
-/* get address length, skiping user info. */
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
-{
- int s = *shift;
-
- /* Search for @, but stop at the end of the line.
- * We are inside a sip: URI, so we don't need to worry about
- * continuation lines. */
- while (dptr <= limit &&
- *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
- (*shift)++;
- dptr++;
- }
-
- if (dptr <= limit && *dptr == '@') {
- dptr++;
- (*shift)++;
- } else
- *shift = s;
-
- return epaddr_len(dptr, limit, shift);
-}
-
-/* Returns 0 if not found, -1 error parsing. */
-int ct_sip_get_info(const char *dptr, size_t dlen,
- unsigned int *matchoff,
- unsigned int *matchlen,
- enum sip_header_pos pos)
-{
- struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
- const char *limit, *aux, *k = dptr;
- int shift = 0;
-
- limit = dptr + (dlen - hnfo->lnlen);
-
- while (dptr <= limit) {
- if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
- (hnfo->sname == NULL ||
- strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
- dptr++;
- continue;
- }
- aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
- ct_sip_lnlen(dptr, limit),
- hnfo->case_sensitive);
- if (!aux) {
- DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
- hnfo->lname);
- return -1;
- }
- aux += hnfo->ln_strlen;
-
- *matchlen = hnfo->match_len(aux, limit, &shift);
- if (!*matchlen)
- return -1;
-
- *matchoff = (aux - k) + shift;
-
- DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
- *matchlen);
- return 1;
- }
- DEBUGP("%s header not found.\n", hnfo->lname);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ct_sip_get_info);
-
-static int set_expected_rtp(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- __be32 ipaddr, u_int16_t port,
- const char *dptr)
-{
- struct ip_conntrack_expect *exp;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- int ret;
- typeof(ip_nat_sdp_hook) ip_nat_sdp;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- return NF_DROP;
-
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.udp.port = 0;
- exp->tuple.dst.ip = ipaddr;
- exp->tuple.dst.u.udp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_UDP;
-
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.udp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
- if (ip_nat_sdp)
- ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
- else {
- if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- else
- ret = NF_ACCEPT;
- }
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-static int sip_help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff, datalen;
- const char *dptr;
- int ret = NF_ACCEPT;
- int matchoff, matchlen;
- __be32 ipaddr;
- u_int16_t port;
- typeof(ip_nat_sip_hook) ip_nat_sip;
-
- /* No Data ? */
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
- DEBUGP("skb->len = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
-
- ip_ct_refresh(ct, *pskb, sip_timeout * HZ);
-
- if (!skb_is_nonlinear(*pskb))
- dptr = (*pskb)->data + dataoff;
- else {
- DEBUGP("Copy of skbuff not supported yet.\n");
- goto out;
- }
-
- ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
- if (ip_nat_sip) {
- if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
- ret = NF_DROP;
- goto out;
- }
- }
-
- /* After this point NAT, could have mangled skb, so
- we need to recalculate payload lenght. */
- datalen = (*pskb)->len - dataoff;
-
- if (datalen < (sizeof("SIP/2.0 200") - 1))
- goto out;
-
- /* RTP info only in some SDP pkts */
- if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
- memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
- goto out;
- }
- /* Get ip and port address from SDP packet. */
- if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- POS_CONNECTION) > 0) {
-
- /* We'll drop only if there are parse problems. */
- if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
- dptr + datalen) < 0) {
- ret = NF_DROP;
- goto out;
- }
- if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- POS_MEDIA) > 0) {
-
- port = simple_strtoul(dptr + matchoff, NULL, 10);
- if (port < 1024) {
- ret = NF_DROP;
- goto out;
- }
- ret = set_expected_rtp(pskb, ct, ctinfo,
- ipaddr, port, dptr);
- }
- }
-out:
- return ret;
-}
-
-static struct ip_conntrack_helper sip[MAX_PORTS];
-static char sip_names[MAX_PORTS][10];
-
-static void fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n", ports[i]);
- ip_conntrack_helper_unregister(&sip[i]);
- }
-}
-
-static int __init init(void)
-{
- int i, ret;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = SIP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- /* Create helper structure */
- memset(&sip[i], 0, sizeof(struct ip_conntrack_helper));
-
- sip[i].tuple.dst.protonum = IPPROTO_UDP;
- sip[i].tuple.src.u.udp.port = htons(ports[i]);
- sip[i].mask.src.u.udp.port = htons(0xFFFF);
- sip[i].mask.dst.protonum = 0xFF;
- sip[i].max_expected = 2;
- sip[i].timeout = 3 * 60; /* 3 minutes */
- sip[i].me = THIS_MODULE;
- sip[i].help = sip_help;
-
- tmpname = &sip_names[i][0];
- if (ports[i] == SIP_PORT)
- sprintf(tmpname, "sip");
- else
- sprintf(tmpname, "sip-%d", i);
- sip[i].name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret = ip_conntrack_helper_register(&sip[i]);
- if (ret) {
- printk("ERROR registering helper for port %d\n",
- ports[i]);
- fini();
- return ret;
- }
- }
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index 56b2f7546d1e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_conntrack module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#endif
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-
-extern atomic_t ip_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int kill_proto(struct ip_conntrack *i, void *data)
-{
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
- *((u_int8_t *) data));
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *proto)
-{
- seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
- NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
- return proto->print_tuple(s, tuple);
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
- const struct ip_conntrack_counter *counter)
-{
- return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)counter->packets,
- (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y) 0
-#endif
-
-struct ct_iter_state {
- unsigned int bucket;
-};
-
-static struct list_head *ct_get_first(struct seq_file *seq)
-{
- struct ct_iter_state *st = seq->private;
-
- for (st->bucket = 0;
- st->bucket < ip_conntrack_htable_size;
- st->bucket++) {
- if (!list_empty(&ip_conntrack_hash[st->bucket]))
- return ip_conntrack_hash[st->bucket].next;
- }
- return NULL;
-}
-
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
-{
- struct ct_iter_state *st = seq->private;
-
- head = head->next;
- while (head == &ip_conntrack_hash[st->bucket]) {
- if (++st->bucket >= ip_conntrack_htable_size)
- return NULL;
- head = ip_conntrack_hash[st->bucket].next;
- }
- return head;
-}
-
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct list_head *head = ct_get_first(seq);
-
- if (head)
- while (pos && (head = ct_get_next(seq, head)))
- pos--;
- return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock_bh(&ip_conntrack_lock);
- return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- (*pos)++;
- return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
- const struct ip_conntrack_tuple_hash *hash = v;
- const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
- struct ip_conntrack_protocol *proto;
-
- IP_NF_ASSERT(conntrack);
-
- /* we only want to print DIR_ORIGINAL */
- if (DIRECTION(hash))
- return 0;
-
- proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
- IP_NF_ASSERT(proto);
-
- if (seq_printf(s, "%-8s %u %ld ",
- proto->name,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
- timer_pending(&conntrack->timeout)
- ? (long)(conntrack->timeout.expires - jiffies)/HZ
- : 0) != 0)
- return -ENOSPC;
-
- if (proto->print_conntrack(s, conntrack))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
- return -ENOSPC;
-
- if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
- if (seq_printf(s, "[UNREPLIED] "))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
- return -ENOSPC;
-
- if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
- if (seq_printf(s, "[ASSURED] "))
- return -ENOSPC;
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (seq_printf(s, "mark=%u ", conntrack->mark))
- return -ENOSPC;
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
- if (seq_printf(s, "secmark=%u ", conntrack->secmark))
- return -ENOSPC;
-#endif
-
- if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
- return -ENOSPC;
-
- return 0;
-}
-
-static struct seq_operations ct_seq_ops = {
- .start = ct_seq_start,
- .next = ct_seq_next,
- .stop = ct_seq_stop,
- .show = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- struct ct_iter_state *st;
- int ret;
-
- st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
- if (st == NULL)
- return -ENOMEM;
- ret = seq_open(file, &ct_seq_ops);
- if (ret)
- goto out_free;
- seq = file->private_data;
- seq->private = st;
- memset(st, 0, sizeof(struct ct_iter_state));
- return ret;
-out_free:
- kfree(st);
- return ret;
-}
-
-static const struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
- .open = ct_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct list_head *e = &ip_conntrack_expect_list;
- loff_t i;
-
- /* strange seq_file api calls stop even if we fail,
- * thus we need to grab lock since stop unlocks */
- read_lock_bh(&ip_conntrack_lock);
-
- if (list_empty(e))
- return NULL;
-
- for (i = 0; i <= *pos; i++) {
- e = e->next;
- if (e == &ip_conntrack_expect_list)
- return NULL;
- }
- return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct list_head *e = v;
-
- ++*pos;
- e = e->next;
-
- if (e == &ip_conntrack_expect_list)
- return NULL;
-
- return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
- struct ip_conntrack_expect *expect = v;
-
- if (expect->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&expect->timeout)
- ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
- else
- seq_printf(s, "- ");
-
- seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
-
- print_tuple(s, &expect->tuple,
- __ip_conntrack_proto_find(expect->tuple.dst.protonum));
- return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
- .start = exp_seq_start,
- .next = exp_seq_next,
- .stop = exp_seq_stop,
- .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &exp_seq_ops);
-}
-
-static const struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
- int cpu;
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- int cpu;
-
- for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
- unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
- struct ip_conntrack_stat *st = v;
-
- if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
- return 0;
- }
-
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
- "%08x %08x %08x %08x %08x %08x %08x %08x \n",
- nr_conntracks,
- st->searched,
- st->found,
- st->new,
- st->invalid,
- st->ignore,
- st->delete,
- st->delete_list,
- st->insert,
- st->insert_failed,
- st->drop,
- st->early_drop,
- st->error,
-
- st->expect_new,
- st->expect_create,
- st->expect_delete
- );
- return 0;
-}
-
-static struct seq_operations ct_cpu_seq_ops = {
- .start = ct_cpu_seq_start,
- .next = ct_cpu_seq_next,
- .stop = ct_cpu_seq_stop,
- .show = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ct_cpu_seq_ops);
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
- .open = ct_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-#endif
-
-static unsigned int ip_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* We've seen it coming out the other side: confirm it */
- return ip_conntrack_confirm(pskb);
-}
-
-static unsigned int ip_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This is where we call the helper: as the packet goes out. */
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) {
- unsigned int ret;
- ret = ct->helper->help(pskb, ct, ctinfo);
- if (ret != NF_ACCEPT)
- return ret;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
- /* Previously seen (loopback)? Ignore. Do this before
- fragment check. */
- if ((*pskb)->nfct)
- return NF_ACCEPT;
-#endif
-
- /* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
- *pskb = ip_ct_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
- return NF_STOLEN;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
- return NF_ACCEPT;
- }
- return ip_conntrack_in(hooknum, pskb, in, out, okfn);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
- make it the first hook. */
-static struct nf_hook_ops ip_conntrack_ops[] = {
- {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
- },
- {
- .hook = ip_conntrack_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK,
- },
- {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
- },
- {
- .hook = ip_conntrack_local,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK,
- },
- {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
- },
- {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
- },
-};
-
-/* Sysctl support */
-
-int ip_conntrack_checksum __read_mostly = 1;
-
-#ifdef CONFIG_SYSCTL
-
-/* From ip_conntrack_core.c */
-extern int ip_conntrack_max;
-extern unsigned int ip_conntrack_htable_size;
-
-/* From ip_conntrack_proto_tcp.c */
-extern unsigned int ip_ct_tcp_timeout_syn_sent;
-extern unsigned int ip_ct_tcp_timeout_syn_recv;
-extern unsigned int ip_ct_tcp_timeout_established;
-extern unsigned int ip_ct_tcp_timeout_fin_wait;
-extern unsigned int ip_ct_tcp_timeout_close_wait;
-extern unsigned int ip_ct_tcp_timeout_last_ack;
-extern unsigned int ip_ct_tcp_timeout_time_wait;
-extern unsigned int ip_ct_tcp_timeout_close;
-extern unsigned int ip_ct_tcp_timeout_max_retrans;
-extern int ip_ct_tcp_loose;
-extern int ip_ct_tcp_be_liberal;
-extern int ip_ct_tcp_max_retrans;
-
-/* From ip_conntrack_proto_udp.c */
-extern unsigned int ip_ct_udp_timeout;
-extern unsigned int ip_ct_udp_timeout_stream;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_icmp_timeout;
-
-/* From ip_conntrack_proto_generic.c */
-extern unsigned int ip_ct_generic_timeout;
-
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
- .procname = "ip_conntrack_count",
- .data = &ip_conntrack_count,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
- .procname = "ip_conntrack_buckets",
- .data = &ip_conntrack_htable_size,
- .maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
- .procname = "ip_conntrack_checksum",
- .data = &ip_conntrack_checksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .data = &ip_ct_tcp_timeout_syn_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .data = &ip_ct_tcp_timeout_syn_recv,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_tcp_timeout_established",
- .data = &ip_ct_tcp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .data = &ip_ct_tcp_timeout_fin_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .data = &ip_ct_tcp_timeout_close_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .data = &ip_ct_tcp_timeout_last_ack,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .data = &ip_ct_tcp_timeout_time_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
- .procname = "ip_conntrack_tcp_timeout_close",
- .data = &ip_ct_tcp_timeout_close,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
- .procname = "ip_conntrack_udp_timeout",
- .data = &ip_ct_udp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
- .procname = "ip_conntrack_udp_timeout_stream",
- .data = &ip_ct_udp_timeout_stream,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
- .procname = "ip_conntrack_icmp_timeout",
- .data = &ip_ct_icmp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
- .procname = "ip_conntrack_generic_timeout",
- .data = &ip_ct_generic_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
- .procname = "ip_conntrack_log_invalid",
- .data = &ip_ct_log_invalid,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &log_invalid_proto_min,
- .extra2 = &log_invalid_proto_max,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .data = &ip_ct_tcp_timeout_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
- .procname = "ip_conntrack_tcp_loose",
- .data = &ip_ct_tcp_loose,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
- .procname = "ip_conntrack_tcp_be_liberal",
- .data = &ip_ct_tcp_be_liberal,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_max_retrans",
- .data = &ip_ct_tcp_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- { .ctl_name = 0 }
-};
-
-#define NET_IP_CONNTRACK_MAX 2089
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- {
- .ctl_name = NET_IP_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-EXPORT_SYMBOL(ip_ct_log_invalid);
-#endif /* CONFIG_SYSCTL */
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
- them. --RR */
-int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_conntrack_lock);
- if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
- ret = -EBUSY;
- goto out;
- }
- rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
- out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
-{
- write_lock_bh(&ip_conntrack_lock);
- rcu_assign_pointer(ip_ct_protos[proto->proto],
- &ip_conntrack_generic_protocol);
- write_unlock_bh(&ip_conntrack_lock);
- synchronize_rcu();
-
- /* Remove all contrack entries for this protocol */
- ip_ct_iterate_cleanup(kill_proto, &proto->proto);
-}
-
-static int __init ip_conntrack_standalone_init(void)
-{
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-#endif
- int ret = 0;
-
- ret = ip_conntrack_init();
- if (ret < 0)
- return ret;
-
-#ifdef CONFIG_PROC_FS
- ret = -ENOMEM;
- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
- if (!proc) goto cleanup_init;
-
- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
- &exp_file_ops);
- if (!proc_exp) goto cleanup_proc;
-
- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
- if (!proc_stat)
- goto cleanup_proc_exp;
-
- proc_stat->proc_fops = &ct_cpu_seq_fops;
- proc_stat->owner = THIS_MODULE;
-#endif
-
- ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
- if (ret < 0) {
- printk("ip_conntrack: can't register hooks.\n");
- goto cleanup_proc_stat;
- }
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
- if (ip_ct_sysctl_header == NULL) {
- printk("ip_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto cleanup_hooks;
- }
-#endif
- return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup_hooks:
- nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#endif
- cleanup_proc_stat:
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
- proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
- proc_net_remove("ip_conntrack");
- cleanup_init:
-#endif /* CONFIG_PROC_FS */
- ip_conntrack_cleanup();
- return ret;
-}
-
-static void __exit ip_conntrack_standalone_fini(void)
-{
- synchronize_net();
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
- nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ip_conntrack", proc_net_stat);
- proc_net_remove("ip_conntrack_expect");
- proc_net_remove("ip_conntrack");
-#endif /* CONFIG_PROC_FS */
- ip_conntrack_cleanup();
-}
-
-module_init(ip_conntrack_standalone_init);
-module_exit(ip_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
- They should call this. */
-void need_conntrack(void)
-{
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(ip_conntrack_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
-#endif
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-EXPORT_SYMBOL(ip_ct_get_tuple);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(ip_conntrack_hash);
-EXPORT_SYMBOL(ip_conntrack_untracked);
-EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(ip_conntrack_tcp_update);
-#endif
-
-EXPORT_SYMBOL_GPL(ip_conntrack_flush);
-EXPORT_SYMBOL_GPL(__ip_conntrack_find);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
-EXPORT_SYMBOL_GPL(ip_conntrack_free);
-EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
-
-EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_checksum);
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index 76e175e7a972..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - port to newnat API
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp connection tracking helper");
-MODULE_LICENSE("GPL");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
-
-static int tftp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tftphdr _tftph, *tfh;
- struct ip_conntrack_expect *exp;
- unsigned int ret = NF_ACCEPT;
- typeof(ip_nat_tftp_hook) ip_nat_tftp;
-
- tfh = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
- sizeof(_tftph), &_tftph);
- if (tfh == NULL)
- return NF_ACCEPT;
-
- switch (ntohs(tfh->opcode)) {
- /* RRQ and WRQ works the same way */
- case TFTP_OPCODE_READ:
- case TFTP_OPCODE_WRITE:
- DEBUGP("");
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- return NF_DROP;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->mask.src.ip = htonl(0xffffffff);
- exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = htonl(0xffffffff);
- exp->mask.dst.u.udp.port = htons(0xffff);
- exp->mask.dst.protonum = 0xff;
- exp->expectfn = NULL;
- exp->flags = 0;
-
- DEBUGP("expect: ");
- DUMP_TUPLE(&exp->tuple);
- DUMP_TUPLE(&exp->mask);
- ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
- if (ip_nat_tftp)
- ret = ip_nat_tftp(pskb, ctinfo, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- break;
- case TFTP_OPCODE_DATA:
- case TFTP_OPCODE_ACK:
- DEBUGP("Data/ACK opcode\n");
- break;
- case TFTP_OPCODE_ERROR:
- DEBUGP("Error opcode\n");
- break;
- default:
- DEBUGP("Unknown opcode\n");
- }
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
-
-static void ip_conntrack_tftp_fini(void)
-{
- int i;
-
- for (i = 0 ; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&tftp[i]);
- }
-}
-
-static int __init ip_conntrack_tftp_init(void)
-{
- int i, ret;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = TFTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- /* Create helper structure */
- memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
-
- tftp[i].tuple.dst.protonum = IPPROTO_UDP;
- tftp[i].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i].mask.dst.protonum = 0xFF;
- tftp[i].mask.src.u.udp.port = htons(0xFFFF);
- tftp[i].max_expected = 1;
- tftp[i].timeout = 5 * 60; /* 5 minutes */
- tftp[i].me = THIS_MODULE;
- tftp[i].help = tftp_help;
-
- tmpname = &tftp_names[i][0];
- if (ports[i] == TFTP_PORT)
- sprintf(tmpname, "tftp");
- else
- sprintf(tmpname, "tftp-%d", i);
- tftp[i].name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret=ip_conntrack_helper_register(&tftp[i]);
- if (ret) {
- printk("ERROR registering helper for port %d\n",
- ports[i]);
- ip_conntrack_tftp_fini();
- return(ret);
- }
- }
- return(0);
-}
-
-module_init(ip_conntrack_tftp_init);
-module_exit(ip_conntrack_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 85df1a9aed33..000000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_nat_amanda.o
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- char buffer[sizeof("65535")];
- u_int16_t port;
- unsigned int ret;
-
- /* Connection comes from client. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_ORIGINAL;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one (ie. same IP: it will be TCP and master is UDP). */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- sprintf(buffer, "%u", port);
- ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen,
- buffer, strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit ip_nat_amanda_fini(void)
-{
- rcu_assign_pointer(ip_nat_amanda_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_amanda_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_amanda_hook));
- rcu_assign_pointer(ip_nat_amanda_hook, help);
- return 0;
-}
-
-module_init(ip_nat_amanda_init);
-module_exit(ip_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index 40737fdbe9a7..000000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,634 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_nat_lock);
-
-/* Calculated at init based on memory size */
-static unsigned int ip_nat_htable_size;
-
-static struct list_head *bysource;
-
-#define MAX_IP_NAT_PROTO 256
-static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-
-static inline struct ip_nat_protocol *
-__ip_nat_proto_find(u_int8_t protonum)
-{
- return rcu_dereference(ip_nat_protos[protonum]);
-}
-
-struct ip_nat_protocol *
-ip_nat_proto_find_get(u_int8_t protonum)
-{
- struct ip_nat_protocol *p;
-
- rcu_read_lock();
- p = __ip_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-
-void
-ip_nat_proto_put(struct ip_nat_protocol *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct ip_conntrack_tuple *tuple)
-{
- /* Original src, to ensure we map it consistently if poss. */
- return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
- tuple->dst.protonum, 0) % ip_nat_htable_size;
-}
-
-/* Noone using conntrack by the time this called. */
-static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
-{
- if (!(conn->status & IPS_NAT_DONE_MASK))
- return;
-
- write_lock_bh(&ip_nat_lock);
- list_del(&conn->nat.info.bysource);
- write_unlock_bh(&ip_nat_lock);
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- /* Conntrack tracking doesn't keep track of outgoing tuples; only
- incoming ones. NAT means they don't have a fixed mapping,
- so we invert the tuple and look for the incoming reply.
-
- We could keep a separate hash if this proves too slow. */
- struct ip_conntrack_tuple reply;
-
- invert_tuplepr(&reply, tuple);
- return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(ip_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range)
-{
- struct ip_nat_protocol *proto;
- int ret = 0;
-
- /* If we are supposed to map IPs, then we must be in the
- range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & IP_NAT_RANGE_MAP_IPS) {
- if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
- || ntohl(tuple->src.ip) > ntohl(range->max_ip))
- return 0;
- }
-
- rcu_read_lock();
- proto = __ip_nat_proto_find(tuple->dst.protonum);
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, IP_NAT_MANIP_SRC,
- &range->min, &range->max))
- ret = 1;
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline int
-same_src(const struct ip_conntrack *ct,
- const struct ip_conntrack_tuple *tuple)
-{
- return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
- == tuple->dst.protonum
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
- == tuple->src.ip
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
- == tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_tuple *result,
- const struct ip_nat_range *range)
-{
- unsigned int h = hash_by_src(tuple);
- struct ip_conntrack *ct;
-
- read_lock_bh(&ip_nat_lock);
- list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
- if (same_src(ct, tuple)) {
- /* Copy source part from reply tuple. */
- invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(result, range)) {
- read_unlock_bh(&ip_nat_lock);
- return 1;
- }
- }
- }
- read_unlock_bh(&ip_nat_lock);
- return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
- 1-65535, we don't do pro-rata allocation based on ports; we choose
- the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- const struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- __be32 *var_ipp;
- /* Host order */
- u_int32_t minip, maxip, j;
-
- /* No IP mapping? Do nothing. */
- if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
- return;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- var_ipp = &tuple->src.ip;
- else
- var_ipp = &tuple->dst.ip;
-
- /* Fast path: only one choice. */
- if (range->min_ip == range->max_ip) {
- *var_ipp = range->min_ip;
- return;
- }
-
- /* Hashing source and destination IPs gives a fairly even
- * spread in practice (if there are a small number of IPs
- * involved, there usually aren't that many connections
- * anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Banking sites
- * like this), even across reboots. */
- minip = ntohl(range->min_ip);
- maxip = ntohl(range->max_ip);
- j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
- *var_ipp = htonl(minip + j % (maxip - minip + 1));
-}
-
-/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
- * we change the source to map into the range. For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
- * range. It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig_tuple,
- const struct ip_nat_range *range,
- struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- struct ip_nat_protocol *proto;
-
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
- and that same mapping gives a unique tuple within the given
- range, use that.
-
- This is only required for source (ie. NAT/masq) mappings.
- So far, we don't do local source mappings, so multiple
- manips not an issue. */
- if (maniptype == IP_NAT_MANIP_SRC) {
- if (find_appropriate_src(orig_tuple, tuple, range)) {
- DEBUGP("get_unique_tuple: Found current src map\n");
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
- if (!ip_nat_used_tuple(tuple, conntrack))
- return;
- }
- }
-
- /* 2) Select the least-used IP/proto combination in the given
- range. */
- *tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, conntrack, maniptype);
-
- /* 3) The per-protocol part of the manip is made to map into
- the range to make a unique tuple. */
-
- rcu_read_lock();
- proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
-
- /* Change protocol info to have some randomization */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
- proto->unique_tuple(tuple, range, maniptype, conntrack);
- goto out;
- }
-
- /* Only bother mapping if it's not already in range and unique */
- if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, maniptype, &range->min, &range->max))
- && !ip_nat_used_tuple(tuple, conntrack))
- goto out;
-
- /* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, maniptype, conntrack);
-out:
- rcu_read_unlock();
-}
-
-unsigned int
-ip_nat_setup_info(struct ip_conntrack *conntrack,
- const struct ip_nat_range *range,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple curr_tuple, new_tuple;
- struct ip_nat_info *info = &conntrack->nat.info;
- int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_POST_ROUTING
- || hooknum == NF_IP_LOCAL_IN
- || hooknum == NF_IP_LOCAL_OUT);
- BUG_ON(ip_nat_initialized(conntrack, maniptype));
-
- /* What we've got will look like inverse of reply. Normally
- this is what is in the conntrack, except for prior
- manipulations (future optimization: if num_manips == 0,
- orig_tp =
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- invert_tuplepr(&curr_tuple,
- &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
-
- if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
- struct ip_conntrack_tuple reply;
-
- /* Alter conntrack table so will recognize replies. */
- invert_tuplepr(&reply, &new_tuple);
- ip_conntrack_alter_reply(conntrack, &reply);
-
- /* Non-atomic: we own this at the moment. */
- if (maniptype == IP_NAT_MANIP_SRC)
- conntrack->status |= IPS_SRC_NAT;
- else
- conntrack->status |= IPS_DST_NAT;
- }
-
- /* Place in source hash if this is the first time. */
- if (have_to_hash) {
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple);
- write_lock_bh(&ip_nat_lock);
- list_add(&info->bysource, &bysource[srchash]);
- write_unlock_bh(&ip_nat_lock);
- }
-
- /* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
- set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
- else
- set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL(ip_nat_setup_info);
-
-/* Returns true if succeeded. */
-static int
-manip_pkt(u_int16_t proto,
- struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *target,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph;
- struct ip_nat_protocol *p;
-
- if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
- return 0;
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- /* Manipulate protcol part. */
-
- /* rcu_read_lock()ed by nf_hook_slow */
- p = __ip_nat_proto_find(proto);
- if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
- return 0;
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
- iph->saddr = target->src.ip;
- } else {
- nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
- iph->daddr = target->dst.ip;
- }
- return 1;
-}
-
-/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int ip_nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
- if (mtype == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- /* Non-atomic: these bits don't change. */
- if (ct->status & statusbit) {
- struct ip_conntrack_tuple target;
-
- /* We are aiming to look like inverse of other direction. */
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
- if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(ip_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } *inside;
- struct ip_conntrack_protocol *proto;
- struct ip_conntrack_tuple inner, target;
- int hdrlen = (*pskb)->nh.iph->ihl * 4;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
-
- if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
- return 0;
-
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-
- /* We're actually going to mangle it beyond trivial checksum
- adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
- return 0;
-
- /* Must be RELATED */
- IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
- (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
- /* Redirects on non-null nats must be dropped, else they'll
- start talking to each other without our translation, and be
- confused... --RR */
- if (inside->icmp.type == ICMP_REDIRECT) {
- /* If NAT isn't finished, assume it and drop. */
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
-
- if (ct->status & IPS_NAT_MASK)
- return 0;
- }
-
- DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
- *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
- /* rcu_read_lock()ed by nf_hook_slow */
- proto = __ip_conntrack_proto_find(inside->ip.protocol);
- if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
- sizeof(struct icmphdr) + inside->ip.ihl*4,
- &inner, proto))
- return 0;
-
- /* Change inner back to look like incoming packet. We do the
- opposite manip on this hook to normal, because it might not
- pass all hooks (locally-generated ICMP). Consider incoming
- packet: PREROUTING (DST manip), routing produces ICMP, goes
- through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4
- + sizeof(inside->icmp),
- &ct->tuplehash[!dir].tuple,
- !manip))
- return 0;
-
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
- }
-
- /* Change outer to look the reply to an incoming packet
- * (proto 0 means don't invert per-proto part). */
- if (manip == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, pskb, 0, &target, manip))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_nat_lock);
- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
- rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
- out:
- write_unlock_bh(&ip_nat_lock);
- return ret;
-}
-EXPORT_SYMBOL(ip_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
- write_lock_bh(&ip_nat_lock);
- rcu_assign_pointer(ip_nat_protos[proto->protonum],
- &ip_nat_unknown_protocol);
- write_unlock_bh(&ip_nat_lock);
- synchronize_rcu();
-}
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-int
-ip_nat_port_range_to_nfattr(struct sk_buff *skb,
- const struct ip_nat_range *range)
-{
- NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
- &range->min.tcp.port);
- NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
- &range->max.tcp.port);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int
-ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
-{
- int ret = 0;
-
- /* we have to return whether we actually parsed something or not */
-
- if (tb[CTA_PROTONAT_PORT_MIN-1]) {
- ret = 1;
- range->min.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
- }
-
- if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
- if (ret)
- range->max.tcp.port = range->min.tcp.port;
- } else {
- ret = 1;
- range->max.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
-EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
-#endif
-
-static int __init ip_nat_init(void)
-{
- size_t i;
-
- /* Leave them the same for the moment. */
- ip_nat_htable_size = ip_conntrack_htable_size;
-
- /* One vmalloc for both hash tables */
- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
- if (!bysource)
- return -ENOMEM;
-
- /* Sew in builtin protocols. */
- write_lock_bh(&ip_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
- write_unlock_bh(&ip_nat_lock);
-
- for (i = 0; i < ip_nat_htable_size; i++) {
- INIT_LIST_HEAD(&bysource[i]);
- }
-
- /* FIXME: Man, this is a hack. <SIGH> */
- IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
- rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
-
- /* Initialize fake conntrack so that NAT will skip it */
- ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
- return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct ip_conntrack *i, void *data)
-{
- memset(&i->nat, 0, sizeof(i->nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
-static void __exit ip_nat_cleanup(void)
-{
- ip_ct_iterate_cleanup(&clean_nat, NULL);
- rcu_assign_pointer(ip_conntrack_destroyed, NULL);
- synchronize_rcu();
- vfree(bysource);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index 32e01d8dffcb..000000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
- sprintf(buffer, "%u,%u,%u,%u,%u,%u",
- NIPQUAD(newip), port>>8, port&0xFF);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|1|255.255.255.255|65535|")];
-
- sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|||65535|")];
-
- sprintf(buffer, "|||%u|", port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
- unsigned int,
- unsigned int,
- struct ip_conntrack *,
- enum ip_conntrack_info,
- u32 *seq)
-= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
- [IP_CT_FTP_PASV] = mangle_rfc959_packet,
- [IP_CT_FTP_EPRT] = mangle_eprt_packet,
- [IP_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_ftp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq)
-{
- __be32 newip;
- u_int16_t port;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack *ct = exp->master;
-
- DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
- /* Connection will come from wherever this packet goes, hence !dir */
- newip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
- seq)) {
- ip_conntrack_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit ip_nat_ftp_fini(void)
-{
- rcu_assign_pointer(ip_nat_ftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_ftp_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_ftp_hook));
- rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_ftp_init);
-module_exit(ip_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index dc778cfef58b..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
- * - add support for SACK adjustment
- * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - merge SACK support into newnat API
- * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
- * - make ip_nat_resize_packet more generic (TCP and UDP)
- * - add ip_nat_mangle_udp_packet
- */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
-
-static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir;
- struct ip_nat_seq *this_way, *other_way;
-
- DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
- (*skb)->len, new_size);
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&ip_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after
- || before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&ip_nat_seqofs_lock);
-
- DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
- unsigned int dataoff,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- unsigned char *data;
-
- BUG_ON(skb_is_nonlinear(skb));
- data = (unsigned char *)skb->nh.iph + dataoff;
-
- /* move post-replacement */
- memmove(data + match_offset + rep_len,
- data + match_offset + match_len,
- skb->tail - (data + match_offset + match_len));
-
- /* insert data from buffer */
- memcpy(data + match_offset, rep_buffer, rep_len);
-
- /* update skb info */
- if (rep_len > match_len) {
- DEBUGP("ip_nat_mangle_packet: Extending packet by "
- "%u from %u bytes\n", rep_len - match_len,
- skb->len);
- skb_put(skb, rep_len - match_len);
- } else {
- DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
- "%u from %u bytes\n", match_len - rep_len,
- skb->len);
- __skb_trim(skb, skb->len + rep_len - match_len);
- }
-
- /* fix IP hdr checksum information */
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
-{
- struct sk_buff *nskb;
-
- if ((*pskb)->len + extra > 65535)
- return 0;
-
- nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
- if (!nskb)
- return 0;
-
- /* Transfer socket to new skb. */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct tcphdr *tcph;
- int oldlen, datalen;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- SKB_LINEAR_ASSERT(*pskb);
-
- iph = (*pskb)->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
-
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
- match_offset, match_len, rep_buffer, rep_len);
-
- datalen = (*pskb)->len - iph->ihl*4;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- tcph->check = 0;
- tcph->check = tcp_v4_check(datalen,
- iph->saddr, iph->daddr,
- csum_partial((char *)tcph,
- datalen, 0));
- } else
- nf_proto_csum_replace2(&tcph->check, *pskb,
- htons(oldlen), htons(datalen), 1);
-
- if (rep_len != match_len) {
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(tcph->seq),
- (int)rep_len - (int)match_len,
- ct, ctinfo);
- /* Tell TCP window tracking about seq change */
- ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
- }
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
- * should be fairly easy to do.
- */
-int
-ip_nat_mangle_udp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct udphdr *udph;
- int datalen, oldlen;
-
- /* UDP helpers might accidentally mangle the wrong packet */
- iph = (*pskb)->nh.iph;
- if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
- match_offset + match_len)
- return 0;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- iph = (*pskb)->nh.iph;
- udph = (void *)iph + iph->ihl*4;
-
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
- match_offset, match_len, rep_buffer, rep_len);
-
- /* update the length of the UDP packet */
- datalen = (*pskb)->len - iph->ihl*4;
- udph->len = htons(datalen);
-
- /* fix udp checksum if udp checksum was previously calculated */
- if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
- return 1;
-
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial((char *)udph,
- datalen, 0));
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- } else
- nf_proto_csum_replace2(&udph->check, *pskb,
- htons(oldlen), htons(datalen), 1);
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct ip_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block_wire *sack;
- __be32 new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_after);
- else
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_before);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_after);
- else
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_before);
-
- DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- nf_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
- nf_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-ip_nat_sack_adjust(struct sk_buff **pskb,
- struct tcphdr *tcph,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
-
- optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
- optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
-
- if (!skb_make_writable(pskb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = (*pskb)->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend
- || optoff + op[1] > optend
- || op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK
- && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
- && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(*pskb, tcph, optoff+2,
- optoff+op[1],
- &ct->nat.info.seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-ip_nat_seq_adjust(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tcphdr *tcph;
- int dir;
- __be32 newseq, newack;
- struct ip_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
- return 0;
-
- tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
- else
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
- else
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
-
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
-
- DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
- return 0;
-
- ip_conntrack_tcp_update(*pskb, ct, dir);
-
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void ip_nat_follow_master(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
deleted file mode 100644
index bdc99ef6159e..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * H.323 extension for NAT alteration.
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 NAT module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- unsigned int addroff, __be32 ip, u_int16_t port)
-{
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
- struct {
- __be32 ip;
- __be16 port;
- } __attribute__ ((__packed__)) buf;
- struct tcphdr _tcph, *th;
-
- buf.ip = ip;
- buf.port = htons(port);
- addroff += dataoff;
-
- if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
- if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- addroff, sizeof(buf),
- (char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("ip_nat_h323: ip_nat_mangle_tcp_packet"
- " error\n");
- return -1;
- }
-
- /* Relocate data pointer */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return -1;
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- th->doff * 4 + dataoff;
- } else {
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- addroff, sizeof(buf),
- (char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("ip_nat_h323: ip_nat_mangle_udp_packet"
- " error\n");
- return -1;
- }
- /* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
- * or pull everything in a linear buffer, so we can safely
- * use the skb pointers now */
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- sizeof(struct udphdr);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- TransportAddress * addr,
- __be32 ip, u_int16_t port)
-{
- return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
-}
-
-/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- __be32 ip, u_int16_t port)
-{
- return set_addr(pskb, data, dataoff,
- addr->unicastAddress.iPAddress.network, ip, port);
-}
-
-/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int i;
- __be32 ip;
- u_int16_t port;
-
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port)) {
- if (ip == ct->tuplehash[dir].tuple.src.ip &&
- port == info->sig_port[dir]) {
- /* GW->GK */
-
- /* Fix for Gnomemeeting */
- if (i > 0 &&
- get_h225_addr(*data, &addr[0],
- &ip, &port) &&
- (ntohl(ip) & 0xff000000) == 0x7f000000)
- i = 0;
-
- DEBUGP
- ("ip_nat_ras: set signal address "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.
- ip), info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].
- tuple.dst.ip,
- info->sig_port[!dir]);
- } else if (ip == ct->tuplehash[dir].tuple.dst.ip &&
- port == info->sig_port[dir]) {
- /* GK->GW */
- DEBUGP
- ("ip_nat_ras: set signal address "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.src.
- ip), info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].
- tuple.src.ip,
- info->sig_port[!dir]);
- }
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- int dir = CTINFO2DIR(ctinfo);
- int i;
- __be32 ip;
- u_int16_t port;
-
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip &&
- port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) {
- DEBUGP("ip_nat_ras: set rasAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
- ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
- port));
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].tuple.dst.ip,
- ntohs(ct->tuplehash[!dir].tuple.
- dst.u.udp.port));
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- u_int16_t port, u_int16_t rtp_port,
- struct ip_conntrack_expect *rtp_exp,
- struct ip_conntrack_expect *rtcp_exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int i;
- u_int16_t nated_port;
-
- /* Set expectations for NAT */
- rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
- rtp_exp->expectfn = ip_nat_follow_master;
- rtp_exp->dir = !dir;
- rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
- rtcp_exp->expectfn = ip_nat_follow_master;
- rtcp_exp->dir = !dir;
-
- /* Lookup existing expects */
- for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
- if (info->rtp_port[i][dir] == rtp_port) {
- /* Expected */
-
- /* Use allocated ports first. This will refresh
- * the expects */
- rtp_exp->tuple.dst.u.udp.port =
- htons(info->rtp_port[i][dir]);
- rtcp_exp->tuple.dst.u.udp.port =
- htons(info->rtp_port[i][dir] + 1);
- break;
- } else if (info->rtp_port[i][dir] == 0) {
- /* Not expected */
- break;
- }
- }
-
- /* Run out of expectations */
- if (i >= H323_RTP_CHANNEL_MAX) {
- if (net_ratelimit())
- printk("ip_nat_h323: out of expectations\n");
- return 0;
- }
-
- /* Try to get a pair of ports. */
- for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
- nated_port != 0; nated_port += 2) {
- rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- if (ip_conntrack_expect_related(rtp_exp) == 0) {
- rtcp_exp->tuple.dst.u.udp.port =
- htons(nated_port + 1);
- if (ip_conntrack_expect_related(rtcp_exp) == 0)
- break;
- ip_conntrack_unexpect_related(rtp_exp);
- }
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_h323: out of RTP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- (port & 1) ? nated_port + 1 : nated_port) == 0) {
- /* Save ports */
- info->rtp_port[i][dir] = rtp_port;
- info->rtp_port[i][!dir] = nated_port;
- } else {
- ip_conntrack_unexpect_related(rtp_exp);
- ip_conntrack_unexpect_related(rtcp_exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtp_exp->tuple.src.ip),
- ntohs(rtp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtp_exp->tuple.dst.ip),
- ntohs(rtp_exp->tuple.dst.u.udp.port));
- DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtcp_exp->tuple.src.ip),
- ntohs(rtcp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtcp_exp->tuple.dst.ip),
- ntohs(rtcp_exp->tuple.dst.u.udp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_follow_master;
- exp->dir = !dir;
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_h323: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_h245_expect()
- * which was set by ip_conntrack_helper_h323.c. It calls both
- * ip_nat_follow_master() and ip_conntrack_h245_expect()
- ****************************************************************************/
-static void ip_nat_h245_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- ip_nat_follow_master(new, this);
- ip_conntrack_h245_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_h245_expect;
- exp->dir = !dir;
-
- /* Check existing expects */
- if (info->sig_port[dir] == port)
- nated_port = info->sig_port[!dir];
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_q931: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h225_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = nated_port;
- } else {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_q931_expect()
- * which was set by ip_conntrack_helper_h323.c.
- ****************************************************************************/
-static void ip_nat_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- struct ip_nat_range range;
-
- if (this->tuple.src.ip != 0) { /* Only accept calls from GK */
- ip_nat_follow_master(new, this);
- goto out;
- }
-
- /* This must be a fresh one. */
- BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip =
- new->master->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
- out:
- ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress * addr, int idx,
- u_int16_t port, struct ip_conntrack_expect *exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
- __be32 ip;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_q931_expect;
- exp->dir = !dir;
-
- /* Check existing expects */
- if (info->sig_port[dir] == port)
- nated_port = info->sig_port[!dir];
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_ras: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h225_addr(pskb, data, 0, &addr[idx],
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = nated_port;
-
- /* Fix for Gnomemeeting */
- if (idx > 0 &&
- get_h225_addr(*data, &addr[0], &ip, &port) &&
- (ntohl(ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr_hook(pskb, data, 0, &addr[0],
- ct->tuplehash[!dir].tuple.dst.ip,
- info->sig_port[!dir]);
- }
- } else {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static void ip_nat_callforwarding_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- struct ip_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip = this->saved_ip;
-
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
- ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port;
-
- /* Set expectations for NAT */
- exp->saved_ip = exp->tuple.dst.ip;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_callforwarding_expect;
- exp->dir = !dir;
-
- /* Try to get same port: if not, try to change it. */
- for (nated_port = port; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_q931: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (!set_h225_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_q931: expect Call Forwarding "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
- BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
- BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
- BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
- BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
- BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
- BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
-
- rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
- rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
- rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
- rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
- rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- rcu_assign_pointer(nat_t120_hook, nat_t120);
- rcu_assign_pointer(nat_h245_hook, nat_h245);
- rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
- rcu_assign_pointer(nat_q931_hook, nat_q931);
-
- DEBUGP("ip_nat_h323: init success\n");
- return 0;
-}
-
-/****************************************************************************/
-static void __exit fini(void)
-{
- rcu_assign_pointer(set_h245_addr_hook, NULL);
- rcu_assign_pointer(set_h225_addr_hook, NULL);
- rcu_assign_pointer(set_sig_addr_hook, NULL);
- rcu_assign_pointer(set_ras_addr_hook, NULL);
- rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
- rcu_assign_pointer(nat_t120_hook, NULL);
- rcu_assign_pointer(nat_h245_hook, NULL);
- rcu_assign_pointer(nat_callforwarding_hook, NULL);
- rcu_assign_pointer(nat_q931_hook, NULL);
- synchronize_rcu();
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 NAT helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index 24ce4a5023d7..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * ip_nat_pptp.c - Version 3.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - NAT to a unique tuple, not to TCP source port
- * (needs netfilter tuple reservation)
- *
- * Changes:
- * 2002-02-10 - Version 1.3
- * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- * in local connections (Philip Craig <philipc@snapgear.com>)
- * - add checks for magicCookie and pptp version
- * - make argument list of pptp_{out,in}bound_packet() shorter
- * - move to C99 style initializers
- * - print version number at module loadtime
- * 2003-09-22 - Version 1.5
- * - use SNATed tcp sourceport as callid, since we get called before
- * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- * 2004-10-22 - Version 2.0
- * - kernel 2.6.x version
- * 2005-06-10 - Version 3.0
- * - kernel >= 2.6.11 version,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "3.0"
-
-#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static void pptp_nat_expected(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = ct->master;
- struct ip_conntrack_expect *other_exp;
- struct ip_conntrack_tuple t;
- struct ip_ct_pptp_master *ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info;
- struct ip_nat_range range;
-
- ct_pptp_info = &master->help.ct_pptp_info;
- nat_pptp_info = &master->nat.help.nat_pptp_info;
-
- /* And here goes the grand finale of corrosion... */
-
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- DEBUGP("we are PNS->PAC\n");
- /* therefore, build tuple for PAC->PNS */
- t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
- t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
- t.dst.protonum = IPPROTO_GRE;
- } else {
- DEBUGP("we are PAC->PNS\n");
- /* build tuple for PNS->PAC */
- t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
- t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
- t.dst.protonum = IPPROTO_GRE;
- }
-
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&t);
- other_exp = ip_conntrack_expect_find_get(&t);
- if (other_exp) {
- ip_conntrack_unexpect_related(other_exp);
- ip_conntrack_expect_put(other_exp);
- DEBUGP("success\n");
- } else {
- DEBUGP("not found!\n");
- }
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- if (exp->dir == IP_CT_DIR_REPLY) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-
-/* outbound packets == from PNS to PAC */
-static int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-
-{
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg;
- __be16 new_callid;
- unsigned int cid_off;
-
- new_callid = ct_pptp_info->pns_call_id;
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = new_callid;
- break;
- case PPTP_IN_CALL_REPLY:
- cid_off = offsetof(union pptp_ctrl_union, icack.callID);
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
- * down to here */
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
-
- /* mangle packet */
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_callid), (char *)&new_callid,
- sizeof(new_callid)) == 0)
- return NF_DROP;
-
- return NF_ACCEPT;
-}
-
-static void
-pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply)
-{
- struct ip_conntrack *ct = expect_orig->master;
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
- /* save original PAC call ID in nat_info */
- nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
- /* alter expectation for PNS->PAC direction */
- expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
- expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
- expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
- expect_orig->dir = IP_CT_DIR_ORIGINAL;
-
- /* alter expectation for PAC->PNS direction */
- expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
- expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
- expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
- expect_reply->dir = IP_CT_DIR_REPLY;
-}
-
-/* inbound packets == from PAC to PNS */
-static int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-{
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg;
- __be16 new_pcid;
- unsigned int pcid_off;
-
- new_pcid = nat_pptp_info->pns_call_id;
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REPLY:
- pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
- break;
- case PPTP_IN_CALL_CONNECT:
- pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
- break;
- case PPTP_IN_CALL_REQUEST:
- /* only need to nat in case PAC is behind NAT box */
- return NF_ACCEPT;
- case PPTP_WAN_ERROR_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
- break;
- case PPTP_CALL_DISCONNECT_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
- break;
- case PPTP_SET_LINK_INFO:
- pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
- break;
-
- default:
- DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
- * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
-
- /* mangle packet */
- DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
-
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- pcid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid)) == 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-
-extern int __init ip_nat_proto_gre_init(void);
-extern void __exit ip_nat_proto_gre_fini(void);
-
-static int __init ip_nat_helper_pptp_init(void)
-{
- int ret;
-
- DEBUGP("%s: registering NAT helper\n", __FILE__);
-
- ret = ip_nat_proto_gre_init();
- if (ret < 0)
- return ret;
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
- rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
- rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
- rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
- rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
-
- printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit ip_nat_helper_pptp_fini(void)
-{
- DEBUGP("cleanup_module\n" );
-
- rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
- synchronize_rcu();
-
- ip_nat_proto_gre_fini();
-
- printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(ip_nat_helper_pptp_init);
-module_exit(ip_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index cfaeea38314f..000000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/moduleparam.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- u_int16_t port;
- unsigned int ret;
-
- /* "4294967296 65635 " */
- char buffer[18];
-
- DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
- expect->seq, exp_irc_info->len,
- ntohl(tcph->seq));
-
- /* Reply comes from server. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_REPLY;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
- * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
- * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
- * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
- * 255.255.255.255==4294967296, 10 digits)
- * P: bound port (min 1 d, max 5d (65635))
- * F: filename (min 1 d )
- * S: size (min 1 d )
- * 0x01, \n: terminators
- */
-
- /* AAA = "us", ie. where server normally talks to. */
- sprintf(buffer, "%u %u",
- ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
- port);
- DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
- buffer, NIPQUAD(exp->tuple.src.ip), port);
-
- ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen, buffer,
- strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit ip_nat_irc_fini(void)
-{
- rcu_assign_pointer(ip_nat_irc_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_irc_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_irc_hook));
- rcu_assign_pointer(ip_nat_irc_hook, help);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_irc_init);
-module_exit(ip_nat_irc_fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 95810202d849..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 key;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- key = tuple->src.u.gre.key;
- else
- key = tuple->dst.u.gre.key;
-
- return ntohs(key) >= ntohs(min->gre.key)
- && ntohs(key) <= ntohs(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t key;
- __be16 *keyptr;
- unsigned int min, i, range_size;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- DEBUGP("%p: NATing GRE PPTP\n", conntrack);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohs(range->min.gre.key);
- range_size = ntohs(range->max.gre.key) - min + 1;
- }
-
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; i < range_size; i++, key++) {
- *keyptr = htons(min + key % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
-
- DEBUGP("%p: no NAT mapping\n", conntrack);
-
- return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct gre_hdr *greh;
- struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- /* pgreh includes two optional 32bit fields which are not required
- * to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
- return 0;
-
- greh = (void *)(*pskb)->data + hdroff;
- pgreh = (struct gre_hdr_pptp *) greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype == IP_NAT_MANIP_DST) {
- /* key manipulation is always dest */
- switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- nf_proto_csum_replace4(gre_csum(greh), *pskb,
- *(gre_key(greh)),
- tuple->dst.u.gre.key, 0);
- }
- *(gre_key(greh)) = tuple->dst.u.gre.key;
- break;
- case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
- ntohs(tuple->dst.u.gre.key));
- pgreh->call_id = tuple->dst.u.gre.key;
- break;
- default:
- DEBUGP("can't nat unknown GRE version\n");
- return 0;
- break;
- }
- }
- return 1;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
- .protonum = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = gre_in_range,
- .unique_tuple = gre_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
-
-int __init ip_nat_proto_gre_init(void)
-{
- return ip_nat_protocol_register(&gre);
-}
-
-void __exit ip_nat_proto_gre_fini(void)
-{
- ip_nat_protocol_unregister(&gre);
-}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 22a528ae0380..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-icmp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static int
-icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xFFFF;
-
- for (i = 0; i < range_size; i++, id++) {
- tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
- (id % range_size));
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-icmp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct icmphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- hdr = (struct icmphdr *)((*pskb)->data + hdroff);
- nf_proto_csum_replace2(&hdr->checksum, *pskb,
- hdr->un.echo.id, tuple->src.u.icmp.id, 0);
- hdr->un.echo.id = tuple->src.u.icmp.id;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_icmp = {
- .name = "ICMP",
- .protonum = IPPROTO_ICMP,
- .me = THIS_MODULE,
- .manip_pkt = icmp_manip_pkt,
- .in_range = icmp_in_range,
- .unique_tuple = icmp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index 14ff24f53a7a..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-
-static int
-tcp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.tcp.port;
- else
- port = tuple->dst.u.tcp.port;
-
- return ntohs(port) >= ntohs(min->tcp.port)
- && ntohs(port) <= ntohs(max->tcp.port);
-}
-
-static int
-tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.tcp.port;
- else
- portptr = &tuple->dst.u.tcp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- /* Map privileged onto privileged. */
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.tcp.port);
- range_size = ntohs(range->max.tcp.port) - min + 1;
- }
-
- /* Start from random port to avoid prediction */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack)) {
- return 1;
- }
- }
- return 0;
-}
-
-static int
-tcp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct tcphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport, oldport;
- int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
- /* this could be a inner header returned in icmp packet; in such
- cases we cannot update the checksum field since it is outside of
- the 8 bytes of transport layer headers we are guaranteed */
- if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
- hdrsize = sizeof(struct tcphdr);
-
- if (!skb_make_writable(pskb, hdroff + hdrsize))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct tcphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.tcp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.tcp.port;
- portptr = &hdr->dest;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return 1;
-
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_tcp = {
- .name = "TCP",
- .protonum = IPPROTO_TCP,
- .me = THIS_MODULE,
- .manip_pkt = tcp_manip_pkt,
- .in_range = tcp_in_range,
- .unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index dfd521672891..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-udp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.udp.port;
- else
- port = tuple->dst.u.udp.port;
-
- return ntohs(port) >= ntohs(min->udp.port)
- && ntohs(port) <= ntohs(max->udp.port);
-}
-
-static int
-udp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.udp.port;
- else
- portptr = &tuple->dst.u.udp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.udp.port);
- range_size = ntohs(range->max.udp.port) - min + 1;
- }
-
- /* Start from random port to avoid prediction */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-udp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct udphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-
- if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
- if (!hdr->check)
- hdr->check = CSUM_MANGLED_0;
- }
- *portptr = newport;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_udp = {
- .name = "UDP",
- .protonum = IPPROTO_UDP,
- .me = THIS_MODULE,
- .manip_pkt = udp_manip_pkt,
- .in_range = udp_in_range,
- .unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf049517246..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type manip_type,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return 1;
-}
-
-static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- /* Sorry: we can't help you; if it's not unique, we can't frob
- anything. */
- return 0;
-}
-
-static int
-unknown_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_unknown_protocol = {
- .name = "unknown",
- /* .me isn't set: getting a ref to this cannot fail. */
- .manip_pkt = unknown_manip_pkt,
- .in_range = unknown_in_range,
- .unique_tuple = unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 080eb1d92200..000000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} nat_initial_table __initdata
-= { { "nat", NAT_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- 0, NULL, { } },
- {
- /* PRE_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* POST_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_OUT */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } }
- },
- /* ERROR */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_error),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
-
-static struct xt_table nat_table = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
- .me = THIS_MODULE,
- .af = AF_INET,
-};
-
-/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
- IP_NF_ASSERT(out);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
-{
- static int warned = 0;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return;
-
- if (rt->rt_src != srcip && !warned) {
- printk("NAT: no longer support implicit source local NAT\n");
- printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
- NIPQUAD(srcip), NIPQUAD(dstip));
- warned = 1;
- }
- ip_rt_put(rt);
-}
-
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_LOCAL_OUT);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- if (hooknum == NF_IP_LOCAL_OUT
- && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle((*pskb)->nh.iph->daddr,
- mr->range[0].min_ip);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-static int ipt_snat_checkentry(const char *tablename,
- const void *entry,
- const struct xt_target *target,
- void *targinfo,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("SNAT: multiple ranges no longer supported\n");
- return 0;
- }
- return 1;
-}
-
-static int ipt_dnat_checkentry(const char *tablename,
- const void *entry,
- const struct xt_target *target,
- void *targinfo,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("DNAT: multiple ranges no longer supported\n");
- return 0;
- }
- if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
- printk("DNAT: port randomization not supported\n");
- return 0;
- }
- return 1;
-}
-
-inline unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- Use reply in case it's already been mangled (eg local packet).
- */
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
- DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
- NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- u_int16_t all
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
- DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
- conntrack, NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-int ip_nat_rule_find(struct sk_buff **pskb,
- unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- int ret;
-
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
-
- if (ret == NF_ACCEPT) {
- if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
- /* NUL mapping */
- ret = alloc_null_binding(ct, info, hooknum);
- }
- return ret;
-}
-
-static struct xt_target ipt_snat_reg = {
- .name = "SNAT",
- .family = AF_INET,
- .target = ipt_snat_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
- .table = "nat",
- .hooks = 1 << NF_IP_POST_ROUTING,
- .checkentry = ipt_snat_checkentry,
-};
-
-static struct xt_target ipt_dnat_reg = {
- .name = "DNAT",
- .family = AF_INET,
- .target = ipt_dnat_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
- .checkentry = ipt_dnat_checkentry,
-};
-
-int __init ip_nat_rule_init(void)
-{
- int ret;
-
- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
- if (ret != 0)
- return ret;
- ret = xt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-
- ret = xt_register_target(&ipt_dnat_reg);
- if (ret != 0)
- goto unregister_snat;
-
- return ret;
-
- unregister_snat:
- xt_unregister_target(&ipt_snat_reg);
- unregister_table:
- xt_unregister_table(&nat_table);
-
- return ret;
-}
-
-void ip_nat_rule_cleanup(void)
-{
- xt_unregister_target(&ipt_dnat_reg);
- xt_unregister_target(&ipt_snat_reg);
- ipt_unregister_table(&nat_table);
-}
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
deleted file mode 100644
index 325c5a9dc2ef..000000000000
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/* SIP extension for UDP NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-struct addr_map {
- struct {
- char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned int srclen, srciplen;
- unsigned int dstlen, dstiplen;
- } addr[IP_CT_DIR_MAX];
-};
-
-static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
-{
- struct ip_conntrack_tuple *t;
- enum ip_conntrack_dir dir;
- unsigned int n;
-
- for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
- t = &ct->tuplehash[dir].tuple;
-
- n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
- NIPQUAD(t->src.ip));
- map->addr[dir].srciplen = n;
- n += sprintf(map->addr[dir].src + n, ":%u",
- ntohs(t->src.u.udp.port));
- map->addr[dir].srclen = n;
-
- n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
- NIPQUAD(t->dst.ip));
- map->addr[dir].dstiplen = n;
- n += sprintf(map->addr[dir].dst + n, ":%u",
- ntohs(t->dst.u.udp.port));
- map->addr[dir].dstlen = n;
- }
-}
-
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct, const char **dptr, size_t dlen,
- enum sip_header_pos pos, struct addr_map *map)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff, addrlen;
- char *addr;
-
- if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
- return 1;
-
- if ((matchlen == map->addr[dir].srciplen ||
- matchlen == map->addr[dir].srclen) &&
- memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
- addr = map->addr[!dir].dst;
- addrlen = map->addr[!dir].dstlen;
- } else if ((matchlen == map->addr[dir].dstiplen ||
- matchlen == map->addr[dir].dstlen) &&
- memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
- addr = map->addr[!dir].src;
- addrlen = map->addr[!dir].srclen;
- } else
- return 1;
-
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen, addr, addrlen))
- return 0;
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- return 1;
-
-}
-
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr)
-{
- enum sip_header_pos pos;
- struct addr_map map;
- int dataoff, datalen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- datalen = (*pskb)->len - dataoff;
- if (datalen < sizeof("SIP/2.0") - 1)
- return NF_DROP;
-
- addr_map_init(ct, &map);
-
- /* Basic rules: requests and responses. */
- if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
- /* 10.2: Constructing the REGISTER Request:
- *
- * The "userinfo" and "@" components of the SIP URI MUST NOT
- * be present.
- */
- if (datalen >= sizeof("REGISTER") - 1 &&
- strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
- pos = POS_REG_REQ_URI;
- else
- pos = POS_REQ_URI;
-
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
- return NF_DROP;
- }
-
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr, size_t dlen,
- char *buffer, int bufflen,
- enum sip_header_pos pos)
-{
- unsigned int matchlen, matchoff;
-
- if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
- return 0;
-
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen, buffer, bufflen))
- return 0;
-
- /* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- return 1;
-}
-
-static int mangle_content_len(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char *dptr)
-{
- unsigned int dataoff, matchoff, matchlen;
- char buffer[sizeof("65536")];
- int bufflen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
- /* Get actual SDP lenght */
- if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, POS_SDP_HEADER) > 0) {
-
- /* since ct_sip_get_info() give us a pointer passing 'v='
- we need to add 2 bytes in this count. */
- int c_len = (*pskb)->len - dataoff - matchoff + 2;
-
- /* Now, update SDP lenght */
- if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, POS_CONTENT) > 0) {
-
- bufflen = sprintf(buffer, "%u", c_len);
-
- return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen,
- buffer, bufflen);
- }
- }
- return 0;
-}
-
-static unsigned int mangle_sdp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- __be32 newip, u_int16_t port,
- const char *dptr)
-{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
- unsigned int dataoff, bufflen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
- /* Mangle owner and contact info. */
- bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_OWNER))
- return 0;
-
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_CONNECTION))
- return 0;
-
- /* Mangle media port. */
- bufflen = sprintf(buffer, "%u", port);
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_MEDIA))
- return 0;
-
- return mangle_content_len(pskb, ctinfo, ct, dptr);
-}
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp,
- const char *dptr)
-{
- struct ip_conntrack *ct = exp->master;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- __be32 newip;
- u_int16_t port;
-
- DEBUGP("ip_nat_sdp():\n");
-
- /* Connection will come from reply */
- newip = ct->tuplehash[!dir].tuple.dst.ip;
-
- exp->tuple.dst.ip = newip;
- exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
- exp->tuple.dst.u.udp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
- ip_conntrack_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
- rcu_assign_pointer(ip_nat_sip_hook, NULL);
- rcu_assign_pointer(ip_nat_sdp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_sip_hook));
- BUG_ON(rcu_dereference(ip_nat_sdp_hook));
- rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
deleted file mode 100644
index e41d0efae515..000000000000
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ /dev/null
@@ -1,1333 +0,0 @@
-/*
- * ip_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified. The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
- */
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-#include <asm/uaccess.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)n)
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
- u_int8_t from;
- u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI 0 /* Universal */
-#define ASN1_APL 1 /* Application */
-#define ASN1_CTX 2 /* Context */
-#define ASN1_PRV 3 /* Private */
-
-/* Tag */
-#define ASN1_EOC 0 /* End Of Contents */
-#define ASN1_BOL 1 /* Boolean */
-#define ASN1_INT 2 /* Integer */
-#define ASN1_BTS 3 /* Bit String */
-#define ASN1_OTS 4 /* Octet String */
-#define ASN1_NUL 5 /* Null */
-#define ASN1_OJI 6 /* Object Identifier */
-#define ASN1_OJD 7 /* Object Description */
-#define ASN1_EXT 8 /* External */
-#define ASN1_SEQ 16 /* Sequence */
-#define ASN1_SET 17 /* Set */
-#define ASN1_NUMSTR 18 /* Numerical String */
-#define ASN1_PRNSTR 19 /* Printable String */
-#define ASN1_TEXSTR 20 /* Teletext String */
-#define ASN1_VIDSTR 21 /* Video String */
-#define ASN1_IA5STR 22 /* IA5 String */
-#define ASN1_UNITIM 23 /* Universal Time */
-#define ASN1_GENTIM 24 /* General Time */
-#define ASN1_GRASTR 25 /* Graphical String */
-#define ASN1_VISSTR 26 /* Visible String */
-#define ASN1_GENSTR 27 /* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI 0 /* Primitive */
-#define ASN1_CON 1 /* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR 0
-#define ASN1_ERR_DEC_EMPTY 2
-#define ASN1_ERR_DEC_EOC_MISMATCH 3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
-#define ASN1_ERR_DEC_BADVALUE 5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
- int error; /* Error condition */
- unsigned char *pointer; /* Octet just to be decoded */
- unsigned char *begin; /* First octet */
- unsigned char *end; /* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
- unsigned char *data;
- unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
- unsigned char *buf,
- unsigned int len)
-{
- ctx->begin = buf;
- ctx->end = buf + len;
- ctx->pointer = buf;
- ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
- if (ctx->pointer >= ctx->end) {
- ctx->error = ASN1_ERR_DEC_EMPTY;
- return 0;
- }
- *ch = *(ctx->pointer)++;
- return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
- unsigned char ch;
-
- *tag = 0;
-
- do
- {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *tag <<= 7;
- *tag |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned char ch;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *cls = (ch & 0xC0) >> 6;
- *con = (ch & 0x20) >> 5;
- *tag = (ch & 0x1F);
-
- if (*tag == 0x1F) {
- if (!asn1_tag_decode(ctx, tag))
- return 0;
- }
- return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
- unsigned int *def,
- unsigned int *len)
-{
- unsigned char ch, cnt;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch == 0x80)
- *def = 0;
- else {
- *def = 1;
-
- if (ch < 0x80)
- *len = ch;
- else {
- cnt = (unsigned char) (ch & 0x7F);
- *len = 0;
-
- while (cnt > 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *len <<= 8;
- *len |= ch;
- cnt--;
- }
- }
- }
- return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
- unsigned char **eoc,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned int def, len;
-
- if (!asn1_id_decode(ctx, cls, con, tag))
- return 0;
-
- def = len = 0;
- if (!asn1_length_decode(ctx, &def, &len))
- return 0;
-
- if (def)
- *eoc = ctx->pointer + len;
- else
- *eoc = NULL;
- return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- unsigned char ch;
-
- if (eoc == 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
- return 1;
- } else {
- if (ctx->pointer != eoc) {
- ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
- return 0;
- }
- return 1;
- }
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- ctx->pointer = eoc;
- return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = (signed char) ch;
- len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned int *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned int)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned char **octets,
- unsigned int *len)
-{
- unsigned char *ptr;
-
- *len = 0;
-
- *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
-
- ptr = *octets;
- while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
- kfree(*octets);
- *octets = NULL;
- return 0;
- }
- (*len)++;
- }
- return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
- unsigned long *subid)
-{
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long **oid,
- unsigned int *len)
-{
- unsigned long subid;
- unsigned int size;
- unsigned long *optr;
-
- size = eoc - ctx->pointer + 1;
- *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
-
- optr = *oid;
-
- if (!asn1_subid_decode(ctx, &subid)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (subid < 40) {
- optr [0] = 0;
- optr [1] = subid;
- } else if (subid < 80) {
- optr [0] = 1;
- optr [1] = subid - 40;
- } else {
- optr [0] = 2;
- optr [1] = subid - 80;
- }
-
- *len = 2;
- optr += 2;
-
- while (ctx->pointer < eoc) {
- if (++(*len) > size) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (!asn1_subid_decode(ctx, optr++)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
- }
- return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1 0
-#define SNMP_V2C 1
-#define SNMP_V2 2
-#define SNMP_V3 3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM 256
-#define SNMP_SIZE_OBJECTID 128
-#define SNMP_SIZE_BUFCHR 256
-#define SNMP_SIZE_BUFINT 128
-#define SNMP_SIZE_SMALLOBJECTID 16
-
-/* Requests */
-#define SNMP_PDU_GET 0
-#define SNMP_PDU_NEXT 1
-#define SNMP_PDU_RESPONSE 2
-#define SNMP_PDU_SET 3
-#define SNMP_PDU_TRAP1 4
-#define SNMP_PDU_BULK 5
-#define SNMP_PDU_INFORM 6
-#define SNMP_PDU_TRAP2 7
-
-/* Errors */
-#define SNMP_NOERROR 0
-#define SNMP_TOOBIG 1
-#define SNMP_NOSUCHNAME 2
-#define SNMP_BADVALUE 3
-#define SNMP_READONLY 4
-#define SNMP_GENERROR 5
-#define SNMP_NOACCESS 6
-#define SNMP_WRONGTYPE 7
-#define SNMP_WRONGLENGTH 8
-#define SNMP_WRONGENCODING 9
-#define SNMP_WRONGVALUE 10
-#define SNMP_NOCREATION 11
-#define SNMP_INCONSISTENTVALUE 12
-#define SNMP_RESOURCEUNAVAILABLE 13
-#define SNMP_COMMITFAILED 14
-#define SNMP_UNDOFAILED 15
-#define SNMP_AUTHORIZATIONERROR 16
-#define SNMP_NOTWRITABLE 17
-#define SNMP_INCONSISTENTNAME 18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART 0
-#define SNMP_TRAP_WARMSTART 1
-#define SNMP_TRAP_LINKDOWN 2
-#define SNMP_TRAP_LINKUP 3
-#define SNMP_TRAP_AUTFAILURE 4
-#define SNMP_TRAP_EQPNEIGHBORLOSS 5
-#define SNMP_TRAP_ENTSPECIFIC 6
-
-/* SNMPv1 Types */
-#define SNMP_NULL 0
-#define SNMP_INTEGER 1 /* l */
-#define SNMP_OCTETSTR 2 /* c */
-#define SNMP_DISPLAYSTR 2 /* c */
-#define SNMP_OBJECTID 3 /* ul */
-#define SNMP_IPADDR 4 /* uc */
-#define SNMP_COUNTER 5 /* ul */
-#define SNMP_GAUGE 6 /* ul */
-#define SNMP_TIMETICKS 7 /* ul */
-#define SNMP_OPAQUE 8 /* c */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER 5 /* ul */
-#define SNMP_BITSTR 9 /* uc */
-#define SNMP_NSAP 10 /* uc */
-#define SNMP_COUNTER64 11 /* ul */
-#define SNMP_NOSUCHOBJECT 12
-#define SNMP_NOSUCHINSTANCE 13
-#define SNMP_ENDOFMIBVIEW 14
-
-union snmp_syntax
-{
- unsigned char uc[0]; /* 8 bit unsigned */
- char c[0]; /* 8 bit signed */
- unsigned long ul[0]; /* 32 bit unsigned */
- long l[0]; /* 32 bit signed */
-};
-
-struct snmp_object
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned short type;
- unsigned int syntax_len;
- union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
- unsigned long id;
- unsigned int error_status;
- unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned long ip_address; /* pointer */
- unsigned int general;
- unsigned int specific;
- unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA 0
-#define SNMP_CNT 1
-#define SNMP_GGE 2
-#define SNMP_TIT 3
-#define SNMP_OPQ 4
-#define SNMP_C64 6
-
-/* SNMP errors */
-#define SERR_NSO 0
-#define SERR_NSI 1
-#define SERR_EOM 2
-
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check);
-struct snmp_cnv
-{
- unsigned int class;
- unsigned int tag;
- int syntax;
-};
-
-static struct snmp_cnv snmp_conv [] =
-{
- {ASN1_UNI, ASN1_NUL, SNMP_NULL},
- {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
- {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
- {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
- {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
- {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
- {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
- {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
- {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
- {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
- /* SNMPv2 data types and errors */
- {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
- {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
- {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
- {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
- {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
- {0, 0, -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
- unsigned int cls,
- unsigned short *syntax)
-{
- struct snmp_cnv *cnv;
-
- cnv = snmp_conv;
-
- while (cnv->syntax != -1) {
- if (cnv->tag == tag && cnv->class == cls) {
- *syntax = cnv->syntax;
- return 1;
- }
- cnv++;
- }
- return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
- struct snmp_object **obj)
-{
- unsigned int cls, con, tag, len, idlen;
- unsigned short type;
- unsigned char *eoc, *end, *p;
- unsigned long *lp, *id;
- unsigned long ul;
- long l;
-
- *obj = NULL;
- id = NULL;
-
- if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &id, &idlen))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
- kfree(id);
- return 0;
- }
-
- if (con != ASN1_PRI) {
- kfree(id);
- return 0;
- }
-
- type = 0;
- if (!snmp_tag_cls2syntax(tag, cls, &type)) {
- kfree(id);
- return 0;
- }
-
- l = 0;
- switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
- kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(lp);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
- kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
- kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
- kfree(id);
- return 0;
- }
-
- (*obj)->syntax_len = len;
- (*obj)->type = type;
- (*obj)->id = id;
- (*obj)->id_len = idlen;
-
- if (!asn1_eoc_decode(ctx, eoc)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- return 1;
-}
-
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
- struct snmp_request *request)
-{
- unsigned int cls, con, tag;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_ulong_decode(ctx, end, &request->id))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_status))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_index))
- return 0;
-
- return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
- const unsigned char *optr,
- const unsigned char *nptr,
- int offset)
-{
- unsigned char s[4];
-
- if (offset & 1) {
- s[0] = s[2] = 0;
- s[1] = ~*optr;
- s[3] = *nptr;
- } else {
- s[1] = s[3] = 0;
- s[0] = ~*optr;
- s[2] = *nptr;
- }
-
- *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * - begin points to the start of the snmp messgae
- * - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check)
-{
- if (map->from == NOCT1(addr)) {
- u_int32_t old;
-
- if (debug)
- memcpy(&old, (unsigned char *)addr, sizeof(old));
-
- *addr = map->to;
-
- /* Update UDP checksum if being used */
- if (*check) {
- fast_csum(check,
- &map->from, &map->to, addr - begin);
- }
-
- if (debug)
- printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
- "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
- }
-}
-
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
- struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned int cls, con, tag, len;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_id_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
- goto err_id_free;
-
- if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
- goto err_id_free;
-
- /* IPv4 only */
- if (len != 4)
- goto err_addr_free;
-
- mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->general))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->specific))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
- goto err_addr_free;
-
- if (!asn1_ulong_decode(ctx, end, &trap->time))
- goto err_addr_free;
-
- return 1;
-
-err_addr_free:
- kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
- kfree(trap->id);
-
- return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-static void hex_dump(unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
- u_int16_t len,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned char *eoc, *end;
- unsigned int cls, con, tag, vers, pdutype;
- struct asn1_ctx ctx;
- struct asn1_octstr comm;
- struct snmp_object **obj;
-
- if (debug > 1)
- hex_dump(msg, len);
-
- asn1_open(&ctx, msg, len);
-
- /*
- * Start of SNMP message.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- /*
- * Version 1 or 2 handled.
- */
- if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
- if (!asn1_uint_decode (&ctx, end, &vers))
- return 0;
- if (debug > 1)
- printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
- if (vers > 1)
- return 1;
-
- /*
- * Community.
- */
- if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
- return 0;
- if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
- return 0;
- if (debug > 1) {
- unsigned int i;
-
- printk(KERN_DEBUG "bsalg: community: ");
- for (i = 0; i < comm.len; i++)
- printk("%c", comm.data[i]);
- printk("\n");
- }
- kfree(comm.data);
-
- /*
- * PDU type
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
- return 0;
- if (cls != ASN1_CTX || con != ASN1_CON)
- return 0;
- if (debug > 1) {
- unsigned char *pdus[] = {
- [SNMP_PDU_GET] = "get",
- [SNMP_PDU_NEXT] = "get-next",
- [SNMP_PDU_RESPONSE] = "response",
- [SNMP_PDU_SET] = "set",
- [SNMP_PDU_TRAP1] = "trapv1",
- [SNMP_PDU_BULK] = "bulk",
- [SNMP_PDU_INFORM] = "inform",
- [SNMP_PDU_TRAP2] = "trapv2"
- };
-
- if (pdutype > SNMP_PDU_TRAP2)
- printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
- else
- printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
- }
- if (pdutype != SNMP_PDU_RESPONSE &&
- pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
- return 1;
-
- /*
- * Request header or v1 trap
- */
- if (pdutype == SNMP_PDU_TRAP1) {
- struct snmp_v1_trap trap;
- unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
- if (ret) {
- kfree(trap.id);
- kfree((unsigned long *)trap.ip_address);
- } else
- return ret;
-
- } else {
- struct snmp_request req;
-
- if (!snmp_request_decode(&ctx, &req))
- return 0;
-
- if (debug > 1)
- printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
- "error_index=%u\n", req.id, req.error_status,
- req.error_index);
- }
-
- /*
- * Loop through objects, look for IP addresses to mangle.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (obj == NULL) {
- if (net_ratelimit())
- printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
- return 0;
- }
-
- while (!asn1_eoc_decode(&ctx, eoc)) {
- unsigned int i;
-
- if (!snmp_object_decode(&ctx, obj)) {
- if (*obj) {
- kfree((*obj)->id);
- kfree(*obj);
- }
- kfree(obj);
- return 0;
- }
-
- if (debug > 1) {
- printk(KERN_DEBUG "bsalg: object: ");
- for (i = 0; i < (*obj)->id_len; i++) {
- if (i > 0)
- printk(".");
- printk("%lu", (*obj)->id[i]);
- }
- printk(": type=%u\n", (*obj)->type);
-
- }
-
- if ((*obj)->type == SNMP_IPADDR)
- mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
-
- kfree((*obj)->id);
- kfree(*obj);
- }
- kfree(obj);
-
- if (!asn1_eoc_decode(&ctx, eoc))
- return 0;
-
- return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
- u_int16_t udplen = ntohs(udph->len);
- u_int16_t paylen = udplen - sizeof(struct udphdr);
- int dir = CTINFO2DIR(ctinfo);
- struct oct1_map map;
-
- /*
- * Determine mappping for application layer addresses based
- * on NAT manipulations for the packet.
- */
- if (dir == IP_CT_DIR_ORIGINAL) {
- /* SNAT traps */
- map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
- map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
- } else {
- /* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
- }
-
- if (map.from == map.to)
- return NF_ACCEPT;
-
- if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
- paylen, &map, &udph->check)) {
- if (net_ratelimit())
- printk(KERN_WARNING "bsalg: parser failed\n");
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir = CTINFO2DIR(ctinfo);
- unsigned int ret;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
-
- /* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
- return NF_ACCEPT;
- if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* No NAT? */
- if (!(ct->status & IPS_NAT_MASK))
- return NF_ACCEPT;
-
- /*
- * Make sure the packet length is ok. So far, we were only guaranteed
- * to have a valid length IP header plus 8 bytes, which means we have
- * enough room for a UDP header. Just verify the UDP length field so we
- * can mess around with the payload.
- */
- if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
- if (net_ratelimit())
- printk(KERN_WARNING "SNMP: dropping malformed packet "
- "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
- NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
- return NF_DROP;
- }
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return NF_DROP;
-
- spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, pskb);
- spin_unlock_bh(&snmp_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper snmp_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp",
-
- .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP},
- },
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF},
- },
-};
-
-static struct ip_conntrack_helper snmp_trap_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp_trap",
-
- .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP},
- },
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF},
- },
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init ip_nat_snmp_basic_init(void)
-{
- int ret = 0;
-
- ret = ip_conntrack_helper_register(&snmp_helper);
- if (ret < 0)
- return ret;
- ret = ip_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- ip_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
-}
-
-static void __exit ip_nat_snmp_basic_fini(void)
-{
- ip_conntrack_helper_unregister(&snmp_helper);
- ip_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(ip_nat_snmp_basic_init);
-module_exit(ip_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index adf25f9f70e1..000000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_nat module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
- struct ip_conntrack *ct;
- struct ip_conntrack_tuple *t;
- enum ip_conntrack_info ctinfo;
- enum ip_conntrack_dir dir;
- unsigned long statusbit;
-
- ct = ip_conntrack_get(skb, &ctinfo);
- if (ct == NULL)
- return;
- dir = CTINFO2DIR(ctinfo);
- t = &ct->tuplehash[dir].tuple;
-
- if (dir == IP_CT_DIR_ORIGINAL)
- statusbit = IPS_DST_NAT;
- else
- statusbit = IPS_SRC_NAT;
-
- if (ct->status & statusbit) {
- fl->fl4_dst = t->dst.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_dport = t->dst.u.tcp.port;
- }
-
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- fl->fl4_src = t->src.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_sport = t->src.u.tcp.port;
- }
-}
-#endif
-
-static unsigned int
-ip_nat_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_nat_info *info;
- /* maniptype == SRC for postrouting. */
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- /* We never see fragments: conntrack defrags on pre-routing
- and local-out, and ip_nat_out protects post-routing. */
- IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- have dropped it. Hence it's the user's responsibilty to
- packet filter it out, or implement conntrack/NAT for that
- protocol. 8) --RR */
- if (!ct) {
- /* Exception: ICMP redirect to new connection (not in
- hash table yet). We must not let this through, in
- case we're doing NAT to the same network. */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
- sizeof(_hdr), &_hdr);
- if (hp != NULL &&
- hp->type == ICMP_REDIRECT)
- return NF_DROP;
- }
- return NF_ACCEPT;
- }
-
- /* Don't try to NAT if this packet is not conntracked */
- if (ct == &ip_conntrack_untracked)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!ip_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, pskb))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- info = &ct->nat.info;
-
- /* Seen it before? This can happen for loopback, retrans,
- or local packets.. */
- if (!ip_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- if (unlikely(is_confirmed(ct)))
- /* NAT module was loaded late */
- ret = alloc_null_binding_confirmed(ct, info,
- hooknum);
- else if (hooknum == NF_IP_LOCAL_IN)
- /* LOCAL_IN hook doesn't have a chain! */
- ret = alloc_null_binding(ct, info, hooknum);
- else
- ret = ip_nat_rule_find(pskb, hooknum,
- in, out, ct,
- info);
-
- if (ret != NF_ACCEPT) {
- return ret;
- }
- } else
- DEBUGP("Already setup manip %s for ct %p\n",
- maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- break;
-
- default:
- /* ESTABLISHED */
- IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
- || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
- info = &ct->nat.info;
- }
-
- IP_NF_ASSERT(info);
- return ip_nat_packet(ct, ctinfo, hooknum, pskb);
-}
-
-static unsigned int
-ip_nat_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- unsigned int ret;
- __be32 daddr = (*pskb)->nh.iph->daddr;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && daddr != (*pskb)->nh.iph->daddr) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_out(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip
- || ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all
- )
- return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
- }
-#endif
- return ret;
-}
-
-static unsigned int
-ip_nat_local_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.dst.ip !=
- ct->tuplehash[!dir].tuple.src.ip
-#ifdef CONFIG_XFRM
- || ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all
-#endif
- )
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
- ret = NF_DROP;
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_adjust(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- DEBUGP("ip_nat_standalone: adjusting sequence number\n");
- if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops ip_nat_ops[] = {
- /* Before packet filtering, change destination */
- {
- .hook = ip_nat_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = ip_nat_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC,
- },
- /* After conntrack, adjust sequence number */
- {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
- /* Before packet filtering, change destination */
- {
- .hook = ip_nat_local_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = ip_nat_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SRC,
- },
- /* After conntrack, adjust sequence number */
- {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
-};
-
-static int __init ip_nat_standalone_init(void)
-{
- int ret = 0;
-
- need_conntrack();
-
-#ifdef CONFIG_XFRM
- BUG_ON(ip_nat_decode_session != NULL);
- ip_nat_decode_session = nat_decode_session;
-#endif
- ret = ip_nat_rule_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_decode_session;
- }
- ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
- if (ret < 0) {
- printk("ip_nat_init: can't register hooks.\n");
- goto cleanup_rule_init;
- }
- return ret;
-
- cleanup_rule_init:
- ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
- ip_nat_decode_session = NULL;
- synchronize_net();
-#endif
- return ret;
-}
-
-static void __exit ip_nat_standalone_fini(void)
-{
- nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
- ip_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
- ip_nat_decode_session = NULL;
- synchronize_net();
-#endif
-}
-
-module_init(ip_nat_standalone_init);
-module_exit(ip_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 604793536fc1..000000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - Port to newnat API
- *
- * This module currently supports DNAT:
- * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
- *
- * and SNAT:
- * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
- *
- * It has not been tested with
- * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
- * If you do test this please let me know if it works or not.
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *ct = exp->master;
-
- exp->saved_proto.udp.port
- = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- exp->dir = IP_CT_DIR_REPLY;
- exp->expectfn = ip_nat_follow_master;
- if (ip_conntrack_expect_related(exp) != 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static void __exit ip_nat_tftp_fini(void)
-{
- rcu_assign_pointer(ip_nat_tftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_tftp_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_tftp_hook));
- rcu_assign_pointer(ip_nat_tftp_hook, help);
- return 0;
-}
-
-module_init(ip_nat_tftp_init);
-module_exit(ip_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a14798a850d7..702d94db19b9 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -8,18 +8,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
- * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
- * Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
- * 2005-01-10: Added /proc counter for dropped packets; fixed so
- * packets aren't delivered to user space if they're going
- * to be dropped.
- * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -191,12 +179,13 @@ ipq_flush(int verdict)
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
+ struct timeval tv;
read_lock_bh(&queue_lock);
@@ -234,15 +223,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -378,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
}
if (!skb_make_writable(&e->skb, v->data_len))
return -ENOMEM;
- memcpy(e->skb->data, v->payload, v->data_len);
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
@@ -495,7 +485,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (skblen < sizeof(*nlh))
return;
- nlh = (struct nlmsghdr *)skb->data;
+ nlh = nlmsg_hdr(skb);
nlmsglen = nlh->nlmsg_len;
if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return;
@@ -678,7 +668,7 @@ static int __init ip_queue_init(void)
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50cc4b92e284..e3f83bf160d9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -7,12 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * - increase module usage count as soon as we have rules inside
- * a table
- * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
- * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
*/
#include <linux/cache.h>
#include <linux/capability.h>
@@ -198,7 +192,7 @@ int do_match(struct ipt_entry_match *m,
{
/* Stop iteration if it doesn't match */
if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
- offset, skb->nh.iph->ihl*4, hotdrop))
+ offset, ip_hdrlen(skb), hotdrop))
return 1;
else
return 0;
@@ -231,7 +225,7 @@ ipt_do_table(struct sk_buff **pskb,
struct xt_table_info *private;
/* Initialization */
- ip = (*pskb)->nh.iph;
+ ip = ip_hdr(*pskb);
datalen = (*pskb)->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
@@ -320,7 +314,7 @@ ipt_do_table(struct sk_buff **pskb,
= 0x57acc001;
#endif
/* Target might have changed stuff. */
- ip = (*pskb)->nh.iph;
+ ip = ip_hdr(*pskb);
datalen = (*pskb)->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e965b333c997..40e273421398 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -21,15 +21,12 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-
-#include <net/checksum.h>
-
#include <linux/netfilter_arp.h>
-
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/checksum.h>
#define CLUSTERIP_VERSION "0.8"
@@ -240,7 +237,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
static inline u_int32_t
clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
u_int16_t sport, dport;
u_int16_t *ports;
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t *mark, hash;
+ u_int32_t hash;
/* don't need to clusterip_config_get() here, since refcount
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- mark = nf_ct_get_mark((*pskb), &ctinfo);
- if (mark == NULL) {
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
* to outgoing connections of other nodes will be
@@ -328,7 +326,7 @@ target(struct sk_buff **pskb,
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
&& (ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
return XT_CONTINUE;
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb,
switch (ctinfo) {
case IP_CT_NEW:
- *mark = hash;
+ ct->mark = hash;
break;
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb,
#ifdef DEBUG_CLUSTERP
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
+ DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
DEBUGP("not responsible\n");
return NF_DROP;
@@ -411,12 +409,10 @@ checkentry(const char *tablename,
"has invalid config pointer!\n");
return 0;
}
- clusterip_config_entry_get(cipinfo->config);
} else {
/* Case B: This is a new rule referring to an existing
* clusterip config. */
cipinfo->config = config;
- clusterip_config_entry_get(cipinfo->config);
}
} else {
/* Case C: This is a completely new clusterip config */
@@ -523,7 +519,7 @@ arp_mangle(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct arphdr *arp = (*pskb)->nh.arph;
+ struct arphdr *arp = arp_hdr(*pskb);
struct arp_payload *payload;
struct clusterip_config *c;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4f565633631d..918ca92e534a 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -5,14 +5,13 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
*/
#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/tcp.h>
#include <net/checksum.h>
@@ -29,13 +28,13 @@ MODULE_DESCRIPTION("iptables ECN modification module");
static inline int
set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return 0;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
oldtos = iph->tos;
iph->tos &= ~IPT_ECN_IP_MASK;
iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -52,7 +51,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
__be16 oldval;
/* Not enought header? */
- tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_tcph), &_tcph);
if (!tcph)
return 0;
@@ -63,9 +62,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
tcph->cwr == einfo->proto.tcp.cwr)))
return 1;
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+ tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -93,7 +92,7 @@ target(struct sk_buff **pskb,
return NF_DROP;
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+ && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
if (!set_ect_tcp(pskb, einfo))
return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd94228..a42c5cd968b1 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
/* MAC logging for input chain only. */
printk("MAC=");
if (skb->dev && skb->dev->hard_header_len
- && skb->mac.raw != (void*)skb->nh.iph) {
+ && skb->mac_header != skb->network_header) {
int i;
- unsigned char *p = skb->mac.raw;
+ const unsigned char *p = skb_mac_header(skb);
for (i = 0; i < skb->dev->hard_header_len; i++,p++)
printk("%02x%c", *p,
i==skb->dev->hard_header_len - 1
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void)
ret = xt_register_target(&ipt_log_reg);
if (ret < 0)
return ret;
- if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
- printk(KERN_WARNING "ipt_LOG: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * iptables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_INET, &ipt_log_logger);
+ if (ret < 0 && ret != -EEXIST)
+ xt_unregister_target(&ipt_log_reg);
+ return ret;
}
static void __exit ipt_log_fini(void)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5955f3a3f8f..d4f2d7775330 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,12 +19,8 @@
#include <net/ip.h>
#include <net/checksum.h>
#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
+#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
DEBUGP("masquerade_check: bad MAP_IPS.\n");
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
-#ifdef CONFIG_NF_NAT_NEEDED
+ struct nf_conn *ct;
struct nf_conn_nat *nat;
-#endif
- struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- struct ip_nat_range newrange;
- const struct ip_nat_multi_range_compat *mr;
+ struct nf_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr;
struct rtable *rt;
__be32 newsrc;
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+ NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = ip_conntrack_get(*pskb, &ctinfo);
-#ifdef CONFIG_NF_NAT_NEEDED
+ ct = nf_ct_get(*pskb, &ctinfo);
nat = nfct_nat(ct);
-#endif
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
*/
-#ifdef CONFIG_NF_NAT_NEEDED
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-#else
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
-#endif
return NF_ACCEPT;
mr = targinfo;
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb,
}
write_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
nat->masq_index = out->ifindex;
-#else
- ct->nat.masq_index = out->ifindex;
-#endif
write_unlock_bh(&masq_lock);
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
newsrc, newsrc,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static inline int
-device_cmp(struct ip_conntrack *i, void *ifindex)
+device_cmp(struct nf_conn *i, void *ifindex)
{
- int ret;
-#ifdef CONFIG_NF_NAT_NEEDED
struct nf_conn_nat *nat = nfct_nat(i);
+ int ret;
if (!nat)
return 0;
-#endif
read_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
ret = (nat->masq_index == (int)(long)ifindex);
-#else
- ret = (i->nat.masq_index == (int)(long)ifindex);
-#endif
read_unlock_bh(&masq_lock);
return ret;
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this,
/* Device was downed. Search entire table for
conntracks which were associated with that device,
and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
+ NF_CT_ASSERT(dev->ifindex != 0);
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
}
return NOTIFY_DONE;
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this,
/* IP address was deleted. Search entire table for
conntracks which were associated with that device,
and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
+ NF_CT_ASSERT(dev->ifindex != 0);
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
}
return NOTIFY_DONE;
@@ -194,7 +173,7 @@ static struct xt_target masquerade = {
.name = "MASQUERADE",
.family = AF_INET,
.target = masquerade_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = 1 << NF_IP_POST_ROUTING,
.checkentry = masquerade_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd7aaa347cd8..068c69bce30e 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,11 +16,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
#define MODULENAME "NETMAP"
MODULE_LICENSE("GPL");
@@ -40,7 +36,7 @@ check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
@@ -61,39 +57,39 @@ target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 new_ip, netmask;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+ struct nf_nat_range newrange;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = ip_conntrack_get(*pskb, &ctinfo);
+ ct = nf_ct_get(*pskb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
- new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+ new_ip = ip_hdr(*pskb)->daddr & ~netmask;
else
- new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+ new_ip = ip_hdr(*pskb)->saddr & ~netmask;
new_ip |= mr->range[0].min_ip & netmask;
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
new_ip, new_ip,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target target_module = {
.name = MODULENAME,
.family = AF_INET,
.target = target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
(1 << NF_IP_LOCAL_OUT),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c2b6b80670f8..68cc76a198eb 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,11 +19,7 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -43,7 +39,7 @@ redirect_check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
DEBUGP("redirect_check: bad MAP_IPS.\n");
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 newdst;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+ struct nf_nat_range newrange;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = ip_conntrack_get(*pskb, &ctinfo);
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ ct = nf_ct_get(*pskb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
/* Local packets: make them go to loopback */
if (hooknum == NF_IP_LOCAL_OUT)
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb,
}
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
newdst, newdst,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target redirect_reg = {
.name = "REDIRECT",
.family = AF_INET,
.target = redirect_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
.checkentry = redirect_check,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 80f739e21824..9041e0741f6f 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
/*
* This is a module which is used for rejecting packets.
- * Added support for customized reject packets (Jozsef Kadlecsik).
- * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
*/
/* (C) 1999-2001 Paul `Rusty' Russell
@@ -43,7 +41,7 @@ MODULE_DESCRIPTION("iptables REJECT target module");
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- struct iphdr *iph = oldskb->nh.iph;
+ struct iphdr *niph;
struct tcphdr _otcph, *oth, *tcph;
__be16 tmp_port;
__be32 tmp_addr;
@@ -51,10 +49,10 @@ static void send_reset(struct sk_buff *oldskb, int hook)
unsigned int addr_type;
/* IP header checks: fragment. */
- if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return;
- oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+ oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(_otcph), &_otcph);
if (oth == NULL)
return;
@@ -64,7 +62,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
return;
/* Check checksum */
- if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
+ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
/* We need a linear, writeable skb. We also need to expand
@@ -84,20 +82,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
skb_shinfo(nskb)->gso_segs = 0;
skb_shinfo(nskb)->gso_type = 0;
- tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+ tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
/* Swap source and dest */
- tmp_addr = nskb->nh.iph->saddr;
- nskb->nh.iph->saddr = nskb->nh.iph->daddr;
- nskb->nh.iph->daddr = tmp_addr;
+ niph = ip_hdr(nskb);
+ tmp_addr = niph->saddr;
+ niph->saddr = niph->daddr;
+ niph->daddr = tmp_addr;
tmp_port = tcph->source;
tcph->source = tcph->dest;
tcph->dest = tmp_port;
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
- skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
- nskb->nh.iph->tot_len = htons(nskb->len);
+ skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
+ niph->tot_len = htons(nskb->len);
if (tcph->ack) {
needs_ack = 0;
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->ack_seq = 0;
} else {
needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
- + oldskb->len - oldskb->nh.iph->ihl*4
- - (oth->doff<<2));
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+ oldskb->len - ip_hdrlen(oldskb) -
+ (oth->doff << 2));
tcph->seq = 0;
}
@@ -122,14 +121,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
/* Adjust TCP checksum */
tcph->check = 0;
tcph->check = tcp_v4_check(sizeof(struct tcphdr),
- nskb->nh.iph->saddr,
- nskb->nh.iph->daddr,
+ niph->saddr, niph->daddr,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
/* Set DF, id = 0 */
- nskb->nh.iph->frag_off = htons(IP_DF);
- nskb->nh.iph->id = 0;
+ niph->frag_off = htons(IP_DF);
+ niph->id = 0;
addr_type = RTN_UNSPEC;
if (hook != NF_IP_FORWARD
@@ -145,12 +143,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nskb->ip_summed = CHECKSUM_NONE;
/* Adjust IP TTL */
- nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+ niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
/* Adjust IP checksum */
- nskb->nh.iph->check = 0;
- nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
- nskb->nh.iph->ihl);
+ niph->check = 0;
+ niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
/* "Never happens" */
if (nskb->len > dst_mtu(nskb->dst))
@@ -182,7 +179,7 @@ static unsigned int reject(struct sk_buff **pskb,
/* Our naive response construction doesn't deal with IP
options, and probably shouldn't try. */
- if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+ if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
return NF_DROP;
/* WARNING: This code causes reentry within iptables.
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index bd4404e5c688..511e5ff84938 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -7,21 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
- * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
- * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
- * * added --nodst to not include destination-ip in new source
- * calculations.
- * * added some more sanity-checks.
- * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
- * * fixed a buggy if-statement in same_check(), should have
- * used ntohl() but didn't.
- * * added support for multiple ranges. IPT_SAME_MAX_RANGE is
- * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
- * and is currently set to 10.
- * * added support for 1-address range, nice to have now that
- * we have multiple ranges.
*/
#include <linux/types.h>
#include <linux/ip.h>
@@ -35,11 +20,7 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
#include <linux/netfilter_ipv4/ipt_SAME.h>
MODULE_LICENSE("GPL");
@@ -138,17 +119,17 @@ same_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
u_int32_t tmpip, aindex;
__be32 new_ip;
const struct ipt_same_info *same = targinfo;
- struct ip_nat_range newrange;
- const struct ip_conntrack_tuple *t;
+ struct nf_nat_range newrange;
+ const struct nf_conntrack_tuple *t;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_POST_ROUTING);
- ct = ip_conntrack_get(*pskb, &ctinfo);
+ ct = nf_ct_get(*pskb, &ctinfo);
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@@ -157,17 +138,10 @@ same_target(struct sk_buff **pskb,
Here we calculate the index in same->iparray which
holds the ipaddress we should use */
-#ifdef CONFIG_NF_NAT_NEEDED
tmpip = ntohl(t->src.u3.ip);
if (!(same->info & IPT_SAME_NODST))
tmpip += ntohl(t->dst.u3.ip);
-#else
- tmpip = ntohl(t->src.ip);
-
- if (!(same->info & IPT_SAME_NODST))
- tmpip += ntohl(t->dst.ip);
-#endif
aindex = tmpip % same->ipnum;
new_ip = htonl(same->iparray[aindex]);
@@ -178,13 +152,13 @@ same_target(struct sk_buff **pskb,
NIPQUAD(new_ip));
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ same->range[0].flags, new_ip, new_ip,
/* FIXME: Use ports from correct range! */
same->range[0].min, same->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target same_reg = {
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index cedf9f7d9d6e..0ad02f249837 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
__u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
oldtos = iph->tos;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 64be31c22ba9..a991ec7bd4e7 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb,
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
switch (info->mode) {
case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index a26404dbe212..23b607b33b32 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,20 +2,6 @@
* netfilter module for userspace packet logging daemons
*
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * 2000/09/22 ulog-cprange feature added
- * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
- * <zander@fokus.gmd.de>
- * 2001/01/30 per-rule nlgroup conflicts with global queue.
- * nlgroup now global (sysctl)
- * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
- * module loadtime -HW
- * 2002/07/07 remove broken nflog_rcv() function -HW
- * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
- * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
- * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
- * resulting in bogus 'error during NLMSG_PUT' messages.
- *
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
@@ -42,8 +28,6 @@
* flushtimeout:
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
- *
- * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
*/
#include <linux/module.h>
@@ -61,6 +45,7 @@
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/sock.h>
#include <linux/bitops.h>
+#include <asm/unaligned.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
@@ -186,6 +171,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
+ struct timeval tv;
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -231,14 +217,15 @@ static void ipt_ulog_packet(unsigned int hooknum,
pm = NLMSG_DATA(nlh);
/* We might not have a timestamp, get one */
- if (skb->tstamp.off_sec == 0)
+ if (skb->tstamp.tv64 == 0)
__net_timestamp((struct sk_buff *)skb);
/* copy hook, prefix, timestamp, payload, etc. */
pm->data_len = copy_len;
- pm->timestamp_sec = skb->tstamp.off_sec;
- pm->timestamp_usec = skb->tstamp.off_usec;
- pm->mark = skb->mark;
+ tv = ktime_to_timeval(skb->tstamp);
+ put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+ put_unaligned(tv.tv_usec, &pm->timestamp_usec);
+ put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
if (prefix != NULL)
strncpy(pm->prefix, prefix, sizeof(pm->prefix));
@@ -248,9 +235,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
*(pm->prefix) = '\0';
if (in && in->hard_header_len > 0
- && skb->mac.raw != (void *) skb->nh.iph
+ && skb->mac_header != skb->network_header
&& in->hard_header_len <= ULOG_MAC_LEN) {
- memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+ memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
pm->mac_len = in->hard_header_len;
} else
pm->mac_len = 0;
@@ -362,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename,
return 1;
}
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+ compat_uint_t nl_group;
+ compat_size_t copy_range;
+ compat_size_t qthreshold;
+ char prefix[ULOG_PREFIX_LEN];
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_ipt_ulog_info *cl = src;
+ struct ipt_ulog_info l = {
+ .nl_group = cl->nl_group,
+ .copy_range = cl->copy_range,
+ .qthreshold = cl->qthreshold,
+ };
+
+ memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+ memcpy(dst, &l, sizeof(l));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ struct ipt_ulog_info *l = src;
+ struct compat_ipt_ulog_info cl = {
+ .nl_group = l->nl_group,
+ .copy_range = l->copy_range,
+ .qthreshold = l->qthreshold,
+ };
+
+ memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+ return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
static struct xt_target ipt_ulog_reg = {
.name = "ULOG",
.family = AF_INET,
.target = ipt_ulog_target,
.targetsize = sizeof(struct ipt_ulog_info),
.checkentry = ipt_ulog_checkentry,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_ulog_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
.me = THIS_MODULE,
};
@@ -389,14 +416,11 @@ static int __init ipt_ulog_init(void)
}
/* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- init_timer(&ulog_buffers[i].timer);
- ulog_buffers[i].timer.function = ulog_timer;
- ulog_buffers[i].timer.data = i;
- }
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!nflognl)
return -ENOMEM;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index cfa0472617f6..a652a1451552 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_addrtype_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
int ret = 1;
if (info->source)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 37508b2cfea6..26218122f865 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the IPv4 and TCP ECN bits
*
- * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
- *
* (C) 2002 by Harald Welte <laforge@gnumonks.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -11,6 +9,7 @@
#include <linux/in.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/tcp.h>
@@ -26,7 +25,7 @@ MODULE_LICENSE("GPL");
static inline int match_ip(const struct sk_buff *skb,
const struct ipt_ecn_info *einfo)
{
- return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+ return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
}
static inline int match_tcp(const struct sk_buff *skb,
@@ -38,8 +37,7 @@ static inline int match_tcp(const struct sk_buff *skb,
/* In practice, TCP match does this, so can't fail. But let's
* be good citizens.
*/
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL) {
*hotdrop = 0;
return 0;
@@ -80,7 +78,7 @@ static int match(const struct sk_buff *skb,
return 0;
if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (skb->nh.iph->protocol != IPPROTO_TCP)
+ if (ip_hdr(skb)->protocol != IPPROTO_TCP)
return 0;
if (!match_tcp(skb, info, hotdrop))
return 0;
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index bc5d5e6091e4..33af9e940887 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_iprange_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
if (info->flags & IPRANGE_SRC) {
if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index aecb9c48e152..15a9e8bbb7cc 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb,
int ret = info->invert;
if (info->side == IPT_RECENT_DEST)
- addr = skb->nh.iph->daddr;
+ addr = ip_hdr(skb)->daddr;
else
- addr = skb->nh.iph->saddr;
+ addr = ip_hdr(skb)->saddr;
- ttl = skb->nh.iph->ttl;
+ ttl = ip_hdr(skb)->ttl;
/* use TTL as seen before forwarding */
if (out && !skb->sk)
ttl++;
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5d33b51d49d8..d314844af12b 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb,
{
const struct ipt_tos_info *info = matchinfo;
- return (skb->nh.iph->tos == info->tos) ^ info->invert;
+ return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
}
static struct xt_match tos_match = {
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 1eca9f400374..ab02d9e3139c 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the TTL
*
- * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
- *
* (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -26,19 +24,20 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_ttl_info *info = matchinfo;
+ const u8 ttl = ip_hdr(skb)->ttl;
switch (info->mode) {
case IPT_TTL_EQ:
- return (skb->nh.iph->ttl == info->ttl);
+ return (ttl == info->ttl);
break;
case IPT_TTL_NE:
- return (!(skb->nh.iph->ttl == info->ttl));
+ return (!(ttl == info->ttl));
break;
case IPT_TTL_LT:
- return (skb->nh.iph->ttl < info->ttl);
+ return (ttl < info->ttl);
break;
case IPT_TTL_GT:
- return (skb->nh.iph->ttl > info->ttl);
+ return (ttl > info->ttl);
break;
default:
printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index d1d61e97b976..42728909eba0 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98b66ef0c714..9278802f2742 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,8 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +15,7 @@
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -130,13 +129,14 @@ ipt_local_hook(unsigned int hook,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
+ const struct iphdr *iph;
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -144,19 +144,23 @@ ipt_local_hook(unsigned int hook,
/* Save things which could affect route */
mark = (*pskb)->mark;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
- tos = (*pskb)->nh.iph->tos;
+ iph = ip_hdr(*pskb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr
- || (*pskb)->mark != mark
- || (*pskb)->nh.iph->tos != tos))
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
- ret = NF_DROP;
+ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+ iph = ip_hdr(*pskb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ (*pskb)->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
return ret;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df8..0654eaae70c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -4,14 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - move L3 protocol dependent part to this file.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add get_features() to support various size of conntrack
- * structures.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/types.h>
@@ -87,7 +79,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
local_bh_enable();
if (skb)
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
return skb;
}
@@ -97,16 +89,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
/* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+ if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
if (net_ratelimit()) {
printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
+ ip_hdr(*pskb)->protocol, hooknum);
}
return -NF_DROP;
}
- *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
- *protonum = (*pskb)->nh.iph->protocol;
+ *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
+ *protonum = ip_hdr(*pskb)->protocol;
return NF_ACCEPT;
}
@@ -152,9 +144,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
return NF_ACCEPT;
return help->helper->help(pskb,
- (*pskb)->nh.raw - (*pskb)->data
- + (*pskb)->nh.iph->ihl*4,
- ct, ctinfo);
+ skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+ ct, ctinfo);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -171,7 +162,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
#endif
/* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
*pskb = nf_ct_ipv4_gather_frags(*pskb,
hooknum == NF_IP_PRE_ROUTING ?
IP_DEFRAG_CONNTRACK_IN :
@@ -199,7 +190,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5fd1e5363c1a..f4fc657c1983 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
*/
#include <linux/types.h>
@@ -158,7 +153,7 @@ icmp_error_message(struct sk_buff *skb,
NF_CT_ASSERT(skb->nfct == NULL);
/* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+ inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
if (inside == NULL)
return -NF_ACCEPT;
@@ -172,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
/* rcu_read_lock()ed by nf_hook_slow */
innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+ dataoff = ip_hdrlen(skb) + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
inside->ip.protocol, &origtuple,
@@ -227,7 +222,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
struct icmphdr _ih, *icmph;
/* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 452e9d326684..ea02f00d2dac 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
- int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ int hdrlen = ip_hdrlen(*pskb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(*pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
- (*pskb)->nh.iph->ihl*4 +
- sizeof(struct icmphdr) + inside->ip.ihl*4,
+ ip_hdrlen(*pskb) + sizeof(struct icmphdr),
+ (ip_hdrlen(*pskb) +
+ sizeof(struct icmphdr) + inside->ip.ihl * 4),
(u_int16_t)AF_INET,
inside->ip.protocol,
&inner, l3proto, l4proto))
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
packet: PREROUTING (DST manip), routing produces ICMP, goes
through POSTROUTING (which must correct the DST manip). */
if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
+ ip_hdrlen(*pskb) + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple,
!manip))
return 0;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
inside->icmp.checksum = 0;
inside->icmp.checksum =
csum_fold(skb_checksum(*pskb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9cbf3f9be13b..fcebc968d37f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb,
unsigned int addroff, __be32 ip, __be16 port)
{
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
struct {
__be32 ip;
__be16 port;
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb,
buf.port = port;
addroff += dataoff;
- if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb,
}
/* Relocate data pointer */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+ th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_tcph), &_tcph);
if (th == NULL)
return -1;
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+ *data = (*pskb)->data + ip_hdrlen(*pskb) +
th->doff * 4 + dataoff;
} else {
if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb,
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- sizeof(struct udphdr);
+ *data = ((*pskb)->data + ip_hdrlen(*pskb) +
+ sizeof(struct udphdr));
}
return 0;
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct ip_nat_range range;
+ struct nf_nat_range range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 49a90c39ffce..15b6e5ce3a04 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -87,12 +87,13 @@ static void mangle_contents(struct sk_buff *skb,
unsigned char *data;
BUG_ON(skb_is_nonlinear(skb));
- data = (unsigned char *)skb->nh.iph + dataoff;
+ data = skb_network_header(skb) + dataoff;
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
- skb->tail - (data + match_offset + match_len));
+ skb->tail - (skb->network_header + dataoff +
+ match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
@@ -111,8 +112,8 @@ static void mangle_contents(struct sk_buff *skb,
}
/* fix IP hdr checksum information */
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
}
/* Unusual, but possible case. */
@@ -152,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -166,7 +168,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
SKB_LINEAR_ASSERT(*pskb);
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
tcph = (void *)iph + iph->ihl*4;
oldlen = (*pskb)->len - iph->ihl*4;
@@ -175,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
datalen = (*pskb)->len - iph->ihl*4;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- tcph->check = 0;
- tcph->check = tcp_v4_check(datalen,
- iph->saddr, iph->daddr,
- csum_partial((char *)tcph,
- datalen, 0));
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+ iph->ihl * 4;
+ (*pskb)->csum_offset = offsetof(struct tcphdr, check);
+ tcph->check = ~tcp_v4_check(datalen,
+ iph->saddr, iph->daddr, 0);
+ } else {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(datalen,
+ iph->saddr, iph->daddr,
+ csum_partial((char *)tcph,
+ datalen, 0));
+ }
} else
nf_proto_csum_replace2(&tcph->check, *pskb,
htons(oldlen), htons(datalen), 1);
@@ -190,7 +203,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
(int)rep_len - (int)match_len,
ct, ctinfo);
/* Tell TCP window tracking about seq change */
- nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
+ nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
ct, CTINFO2DIR(ctinfo));
}
return 1;
@@ -216,12 +229,13 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
match_offset + match_len)
return 0;
@@ -234,7 +248,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
!enlarge_skb(pskb, rep_len - match_len))
return 0;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
udph = (void *)iph + iph->ihl*4;
oldlen = (*pskb)->len - iph->ihl*4;
@@ -250,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
return 1;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial((char *)udph,
- datalen, 0));
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+ iph->ihl * 4;
+ (*pskb)->csum_offset = offsetof(struct udphdr, check);
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ 0);
+ } else {
+ udph->check = 0;
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ csum_partial((char *)udph,
+ datalen, 0));
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
} else
nf_proto_csum_replace2(&udph->check, *pskb,
htons(oldlen), htons(datalen), 1);
@@ -318,8 +344,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
unsigned int dir, optoff, optend;
struct nf_conn_nat *nat = nfct_nat(ct);
- optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
- optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+ optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
+ optend = ip_hdrlen(*pskb) + tcph->doff * 4;
if (!skb_make_writable(pskb, optend))
return 0;
@@ -371,10 +397,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
this_way = &nat->info.seq[dir];
other_way = &nat->info.seq[!dir];
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
else
@@ -399,7 +425,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
return 0;
- nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
+ nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 7ba341c22eaa..a66888749ceb 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t;
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
- struct ip_nat_range range;
+ struct nf_nat_range range;
ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index e5a34c17d927..c3908bc5a709 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -72,6 +72,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
__be16 *keyptr;
unsigned int min, i, range_size;
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!conntrack->master)
+ return 0;
+
if (maniptype == IP_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
@@ -122,18 +127,9 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
if (maniptype != IP_NAT_MANIP_DST)
return 1;
switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- nf_proto_csum_replace4(gre_csum(greh), *pskb,
- *(gre_key(greh)),
- tuple->dst.u.gre.key, 0);
- }
- *(gre_key(greh)) = tuple->dst.u.gre.key;
+ case GRE_VERSION_1701:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
break;
case GRE_VERSION_PPTP:
DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 147a4370cf03..2534f718ab92 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
if (hooknum == NF_IP_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -226,10 +226,6 @@ static int ipt_dnat_checkentry(const char *tablename,
printk("DNAT: multiple ranges no longer supported\n");
return 0;
}
- if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
- printk("DNAT: port randomization not supported\n");
- return 0;
- }
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b12cd7c314ca..fac97cf51ae5 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/udp.h>
#include <net/netfilter/nf_nat.h>
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
matchoff, matchlen, addr, addrlen))
return 0;
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
return 1;
}
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
struct addr_map map;
int dataoff, datalen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
datalen = (*pskb)->len - dataoff;
if (datalen < sizeof("SIP/2.0") - 1)
return NF_DROP;
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
return 0;
/* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
return 1;
}
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb,
char buffer[sizeof("65536")];
int bufflen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
/* Get actual SDP lenght */
if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
char buffer[sizeof("nnn.nnn.nnn.nnn")];
unsigned int dataoff, bufflen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
/* Mangle owner and contact info. */
bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
@@ -221,6 +222,29 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
return mangle_content_len(pskb, ctinfo, ct, dptr);
}
+static void ip_nat_sdp_expect(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip = exp->saved_ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
+
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
static unsigned int ip_nat_sdp(struct sk_buff **pskb,
@@ -238,13 +262,14 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
/* Connection will come from reply */
newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_ip = exp->tuple.dst.u3.ip;
exp->tuple.dst.u3.ip = newip;
exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
exp->dir = !dir;
/* When you see the packet, we need to NAT it the same as the
this one. */
- exp->expectfn = nf_nat_follow_master;
+ exp->expectfn = ip_nat_sdp_expect;
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ce5c4939a6ee..6e88505d6162 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,10 +38,6 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -1194,7 +1190,7 @@ static int snmp_translate(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct sk_buff **pskb)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1235,7 +1231,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index e4d3ef17d45b..64bbed2ba780 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum,
/* We never see fragments: conntrack defrags on pre-routing
and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
+ NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
ct = nf_ct_get(*pskb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
@@ -98,11 +97,10 @@ nf_nat_fn(unsigned int hooknum,
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
struct icmphdr _hdr, *hp;
- hp = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
+ hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_hdr), &_hdr);
if (hp != NULL &&
hp->type == ICMP_REDIRECT)
@@ -122,7 +120,7 @@ nf_nat_fn(unsigned int hooknum,
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(ct, ctinfo,
hooknum, pskb))
return NF_DROP;
@@ -177,11 +175,11 @@ nf_nat_in(unsigned int hooknum,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
- __be32 daddr = (*pskb)->nh.iph->daddr;
+ __be32 daddr = ip_hdr(*pskb)->daddr;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != (*pskb)->nh.iph->daddr) {
+ daddr != ip_hdr(*pskb)->daddr) {
dst_release((*pskb)->dst);
(*pskb)->dst = NULL;
}
@@ -203,7 +201,7 @@ nf_nat_out(unsigned int hooknum,
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) ||
- (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
return NF_ACCEPT;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -236,7 +234,7 @@ nf_nat_local_fn(unsigned int hooknum,
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) ||
- (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
return NF_ACCEPT;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -245,14 +243,16 @@ nf_nat_local_fn(unsigned int hooknum,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
- ct->tuplehash[!dir].tuple.src.u3.ip
-#ifdef CONFIG_XFRM
- || ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all
-#endif
- )
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
if (ip_route_me_harder(pskb, RTN_UNSPEC))
ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (ip_xfrm_me_harder(pskb))
+ ret = NF_DROP;
+#endif
}
return ret;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae68a691e8cd..37ab5802ca08 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -87,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = {
.release = single_release,
};
-static unsigned long
-fold_field(void *mib[], int offt)
-{
- unsigned long res = 0;
- int i;
-
- for_each_possible_cpu(i) {
- res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
- res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
- }
- return res;
-}
-
/* snmp items */
static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
@@ -266,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) ip_statistics,
- snmp4_ipstats_list[i].entry));
+ snmp_fold_field((void **)ip_statistics,
+ snmp4_ipstats_list[i].entry));
seq_puts(seq, "\nIcmp:");
for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
@@ -276,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nIcmp:");
for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) icmp_statistics,
- snmp4_icmp_list[i].entry));
+ snmp_fold_field((void **)icmp_statistics,
+ snmp4_icmp_list[i].entry));
seq_puts(seq, "\nTcp:");
for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
@@ -288,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld",
- fold_field((void **) tcp_statistics,
- snmp4_tcp_list[i].entry));
+ snmp_fold_field((void **)tcp_statistics,
+ snmp4_tcp_list[i].entry));
else
seq_printf(seq, " %lu",
- fold_field((void **) tcp_statistics,
- snmp4_tcp_list[i].entry));
+ snmp_fold_field((void **)tcp_statistics,
+ snmp4_tcp_list[i].entry));
}
seq_puts(seq, "\nUdp:");
@@ -303,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) udp_statistics,
- snmp4_udp_list[i].entry));
+ snmp_fold_field((void **)udp_statistics,
+ snmp4_udp_list[i].entry));
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
@@ -314,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) udplite_statistics,
- snmp4_udp_list[i].entry) );
+ snmp_fold_field((void **)udplite_statistics,
+ snmp4_udp_list[i].entry));
seq_putc(seq, '\n');
return 0;
@@ -348,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) net_statistics,
- snmp4_net_list[i].entry));
+ snmp_fold_field((void **)net_statistics,
+ snmp4_net_list[i].entry));
seq_putc(seq, '\n');
return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index da70fef82c93..971ab9356e51 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -45,7 +45,7 @@
#include <net/ipip.h>
#include <linux/igmp.h>
-struct net_protocol *inet_protos[MAX_INET_PROTOS];
+struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
static DEFINE_SPINLOCK(inet_proto_lock);
/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 87e9c1618100..24d7c9f31918 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
return 1;
- type = skb->h.icmph->type;
+ type = icmp_hdr(skb)->type;
if (type < 32) {
__u32 data = raw_sk(sk)->filter.data;
@@ -184,8 +184,8 @@ out:
void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
{
struct inet_sock *inet = inet_sk(sk);
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int err = 0;
int harderr = 0;
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
}
nf_reset(skb);
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
raw_rcv_skb(sk, skb);
return 0;
@@ -291,11 +291,13 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst);
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ skb_put(skb, length);
skb->ip_summed = CHECKSUM_NONE;
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
err = memcpy_fromiovecend((void *)iph, from, 0, length);
if (err)
goto error_fault;
@@ -613,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* Copy the address. */
if (sin) {
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
@@ -887,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations raw_seq_ops = {
+static const struct seq_operations raw_seq_ops = {
.start = raw_seq_start,
.next = raw_seq_next,
.stop = raw_seq_stop,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf94..cb76e3c725a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -82,7 +82,6 @@
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/pkt_sched.h>
@@ -104,6 +103,7 @@
#include <net/xfrm.h>
#include <net/ip_mp_alg.h>
#include <net/netevent.h>
+#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations rt_cache_seq_ops = {
+static const struct seq_operations rt_cache_seq_ops = {
.start = rt_cache_seq_start,
.next = rt_cache_seq_next,
.stop = rt_cache_seq_stop,
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations rt_cpu_seq_ops = {
+static const struct seq_operations rt_cpu_seq_ops = {
.start = rt_cpu_seq_start,
.next = rt_cpu_seq_next,
.stop = rt_cpu_seq_stop,
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
static int ip_rt_bug(struct sk_buff *skb)
{
printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
- NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
+ NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
skb->dev ? skb->dev->name : "?");
kfree_skb(skb);
return 0;
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
printk(KERN_WARNING "martian source %u.%u.%u.%u from "
"%u.%u.%u.%u, on dev %s\n",
NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
- if (dev->hard_header_len && skb->mac.raw) {
+ if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
int i;
- unsigned char *p = skb->mac.raw;
+ const unsigned char *p = skb_mac_header(skb);
printk(KERN_WARNING "ll header: ");
for (i = 0; i < dev->hard_header_len; i++, p++) {
printk("%02x", *p);
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rcu_read_lock();
if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
int our = ip_check_mc(in_dev, daddr, saddr,
- skb->nh.iph->protocol);
+ ip_hdr(skb)->protocol);
if (our
#ifdef CONFIG_IP_MROUTE
|| (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = ip_dev_find(oldflp->fl4_src);
- if (dev_out == NULL)
+ if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
goto out;
/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
of another iface. --ANK
*/
- if (oldflp->oif == 0
+ if (dev_out && oldflp->oif == 0
&& (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
id = rt->peer->ip_id_count;
if (rt->peer->tcp_ts_stamp) {
ts = rt->peer->tcp_ts;
- tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+ tsage = get_seconds() - rt->peer->tcp_ts_stamp;
}
}
@@ -2721,7 +2721,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -2747,10 +2747,11 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
*/
- skb->mac.raw = skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- skb->nh.iph->protocol = IPPROTO_ICMP;
+ ip_hdr(skb)->protocol = IPPROTO_ICMP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
@@ -3193,6 +3194,8 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
+ rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+
return rc;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33016cc90f0b..2da1be0589a9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
{
struct tcp_sock *tp = tcp_sk(sk);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
-
tp->last_synq_overflow = jiffies;
/* XXX sort msstab[] by probability? Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
- return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->h.th->source, skb->h.th->dest,
- ntohl(skb->h.th->seq),
+ return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
+ th->source, th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
*/
static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
{
- __u32 seq;
- __u32 mssind;
-
- seq = ntohl(skb->h.th->seq)-1;
- mssind = check_tcp_syn_cookie(cookie,
- skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->h.th->source, skb->h.th->dest,
- seq, jiffies / (HZ * 60), COUNTER_TRIES);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 seq = ntohl(th->seq) - 1;
+ __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+ th->source, th->dest, seq,
+ jiffies / (HZ * 60),
+ COUNTER_TRIES);
return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
}
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct tcp_sock *tp = tcp_sk(sk);
- __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+ if (!sysctl_tcp_syncookies || !th->ack)
goto out;
if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,12 +220,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
ireq = inet_rsk(req);
treq = tcp_rsk(req);
- treq->rcv_isn = ntohl(skb->h.th->seq) - 1;
+ treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
req->mss = mss;
- ireq->rmt_port = skb->h.th->source;
- ireq->loc_addr = skb->nh.iph->daddr;
- ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->rmt_port = th->source;
+ ireq->loc_addr = ip_hdr(skb)->daddr;
+ ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
/* We throwed the options of the initial SYN away, so we hope
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
.tos = RT_CONN_FLAGS(sk) } },
.proto = IPPROTO_TCP,
.uli_u = { .ports =
- { .sport = skb->h.th->dest,
- .dport = skb->h.th->source } } };
+ { .sport = th->dest,
+ .dport = th->source } } };
security_req_classify_flow(req, &fl);
if (ip_route_output_key(&rt, &fl)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0aa304711a96..6817d6485df5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
+ .ctl_name = NET_TCP_FRTO_RESPONSE,
+ .procname = "tcp_frto_response",
+ .data = &sysctl_tcp_frto_response,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
.ctl_name = NET_TCP_LOW_LATENCY,
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
@@ -803,6 +811,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_allowed_congestion_control,
.strategy = &strategy_allowed_congestion_control,
},
+ {
+ .ctl_name = NET_TCP_MAX_SSTHRESH,
+ .procname = "tcp_max_ssthresh",
+ .data = &sysctl_tcp_max_ssthresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 74c4d103ebc2..bd4c295f5d79 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -252,7 +252,6 @@
#include <linux/fcntl.h>
#include <linux/poll.h>
#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/random.h>
#include <linux/bootmem.h>
@@ -297,7 +296,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
* All the sk_stream_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
-int tcp_memory_pressure;
+int tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL(tcp_memory_pressure);
@@ -425,7 +424,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* Subtract 1, if FIN is in queue. */
if (answ && !skb_queue_empty(&sk->sk_receive_queue))
answ -=
- ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+ tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
} else
answ = tp->urg_seq - tp->copied_seq;
release_sock(sk);
@@ -444,7 +443,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
break;
default:
return -ENOIOCTLCMD;
- };
+ }
return put_user(answ, (int __user *)arg);
}
@@ -460,9 +459,9 @@ static inline int forced_push(struct tcp_sock *tp)
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
-static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
skb->csum = 0;
@@ -470,10 +469,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
tcb->flags = TCPCB_FLAG_ACK;
tcb->sacked = 0;
skb_header_release(skb);
- __skb_queue_tail(&sk->sk_write_queue, skb);
+ tcp_add_write_queue_tail(sk, skb);
sk_charge_skb(sk, skb);
- if (!sk->sk_send_head)
- sk->sk_send_head = skb;
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
}
@@ -488,15 +485,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
}
}
-static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
- int mss_now, int nonagle)
+static inline void tcp_push(struct sock *sk, int flags, int mss_now,
+ int nonagle)
{
- if (sk->sk_send_head) {
- struct sk_buff *skb = sk->sk_write_queue.prev;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tcp_send_head(sk)) {
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb);
tcp_mark_urg(tp, flags, skb);
- __tcp_push_pending_frames(sk, tp, mss_now,
+ __tcp_push_pending_frames(sk, mss_now,
(flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
}
}
@@ -526,13 +525,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
goto do_error;
while (psize > 0) {
- struct sk_buff *skb = sk->sk_write_queue.prev;
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
struct page *page = pages[poffset / PAGE_SIZE];
int copy, i, can_coalesce;
int offset = poffset % PAGE_SIZE;
int size = min_t(size_t, psize, PAGE_SIZE - offset);
- if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+ if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -542,7 +541,7 @@ new_segment:
if (!skb)
goto wait_for_memory;
- skb_entail(sk, tp, skb);
+ skb_entail(sk, skb);
copy = size_goal;
}
@@ -588,8 +587,8 @@ new_segment:
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
- } else if (skb == sk->sk_send_head)
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+ } else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
@@ -597,7 +596,7 @@ wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -608,7 +607,7 @@ wait_for_memory:
out:
if (copied)
- tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle);
return copied;
do_error:
@@ -639,8 +638,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+static inline int select_size(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sk->sk_route_caps & NETIF_F_SG) {
@@ -704,9 +704,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
while (seglen > 0) {
int copy;
- skb = sk->sk_write_queue.prev;
+ skb = tcp_write_queue_tail(sk);
- if (!sk->sk_send_head ||
+ if (!tcp_send_head(sk) ||
(copy = size_goal - skb->len) <= 0) {
new_segment:
@@ -716,7 +716,7 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+ skb = sk_stream_alloc_pskb(sk, select_size(sk),
0, sk->sk_allocation);
if (!skb)
goto wait_for_memory;
@@ -727,7 +727,7 @@ new_segment:
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
skb->ip_summed = CHECKSUM_PARTIAL;
- skb_entail(sk, tp, skb);
+ skb_entail(sk, skb);
copy = size_goal;
}
@@ -832,8 +832,8 @@ new_segment:
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
- } else if (skb == sk->sk_send_head)
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+ } else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
@@ -841,7 +841,7 @@ wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -853,16 +853,18 @@ wait_for_memory:
out:
if (copied)
- tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle);
TCP_CHECK_TIMER(sk);
release_sock(sk);
return copied;
do_fault:
if (!skb->len) {
- if (sk->sk_send_head == skb)
- sk->sk_send_head = NULL;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
+ /* It is the one place in all of TCP, except connection
+ * reset, where we can be unlinking the send_head.
+ */
+ tcp_check_send_head(sk, skb);
sk_stream_free_skb(sk, skb);
}
@@ -1016,9 +1018,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
skb_queue_walk(&sk->sk_receive_queue, skb) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (skb->h.th->syn)
+ if (tcp_hdr(skb)->syn)
offset--;
- if (offset < skb->len || skb->h.th->fin) {
+ if (offset < skb->len || tcp_hdr(skb)->fin) {
*off = offset;
return skb;
}
@@ -1070,7 +1072,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset != skb->len)
break;
}
- if (skb->h.th->fin) {
+ if (tcp_hdr(skb)->fin) {
sk_eat_skb(sk, skb, 0);
++seq;
break;
@@ -1174,11 +1176,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;
}
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (skb->h.th->syn)
+ if (tcp_hdr(skb)->syn)
offset--;
if (offset < skb->len)
goto found_ok_skb;
- if (skb->h.th->fin)
+ if (tcp_hdr(skb)->fin)
goto found_fin_ok;
BUG_TRAP(flags & MSG_PEEK);
skb = skb->next;
@@ -1389,12 +1391,12 @@ do_prequeue:
skip_copy:
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
tp->urg_data = 0;
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
}
if (used + offset < skb->len)
continue;
- if (skb->h.th->fin)
+ if (tcp_hdr(skb)->fin)
goto found_fin_ok;
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, copied_early);
@@ -1563,21 +1565,19 @@ void tcp_close(struct sock *sk, long timeout)
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
- skb->h.th->fin;
+ tcp_hdr(skb)->fin;
data_was_unread += len;
__kfree_skb(skb);
}
sk_stream_mem_reclaim(sk);
- /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
- * 3.10, we send a RST here because data was lost. To
- * witness the awful effects of the old behavior of always
- * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
- * a bulk GET in an FTP client, suspend the process, wait
- * for the client to advertise a zero window, then kill -9
- * the FTP client, wheee... Note: timeout is always zero
- * in such a case.
+ /* As outlined in RFC 2525, section 2.17, we send a RST here because
+ * data was lost. To witness the awful effects of the old behavior of
+ * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
+ * GET in an FTP client, suspend the process, wait for the client to
+ * advertise a zero window, then kill -9 the FTP client, wheee...
+ * Note: timeout is always zero in such a case.
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
@@ -1732,7 +1732,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
- sk_stream_writequeue_purge(sk);
+ tcp_write_queue_purge(sk);
__skb_queue_purge(&tp->out_of_order_queue);
#ifdef CONFIG_NET_DMA
__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,9 +1758,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
- sk->sk_send_head = NULL;
- tp->rx_opt.saw_tstamp = 0;
- tcp_sack_reset(&tp->rx_opt);
+ tcp_init_send_head(sk);
+ memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
@@ -1830,7 +1829,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
* for currently queued segments.
*/
tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
} else {
tp->nonagle &= ~TCP_NAGLE_OFF;
}
@@ -1854,7 +1853,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->nonagle &= ~TCP_NAGLE_CORK;
if (tp->nonagle&TCP_NAGLE_OFF)
tp->nonagle |= TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
}
break;
@@ -1954,7 +1953,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
default:
err = -ENOPROTOOPT;
break;
- };
+ }
+
release_sock(sk);
return err;
}
@@ -2124,7 +2124,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
if (put_user(len, optlen))
return -EFAULT;
@@ -2170,7 +2170,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
- th = skb->h.th;
+ th = tcp_hdr(skb);
thlen = th->doff * 4;
if (thlen < sizeof(*th))
goto out;
@@ -2210,7 +2210,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
delta = htonl(oldlen + (thlen + len));
skb = segs;
- th = skb->h.th;
+ th = tcp_hdr(skb);
seq = ntohl(th->seq);
do {
@@ -2219,23 +2219,25 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb->h.raw, thlen,
- skb->csum));
+ th->check =
+ csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
seq += len;
skb = skb->next;
- th = skb->h.th;
+ th = tcp_hdr(skb);
th->seq = htonl(seq);
th->cwr = 0;
} while (skb->next);
- delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+ delta = htonl(oldlen + (skb->tail - skb->transport_header) +
+ skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb->h.raw, thlen,
- skb->csum));
+ th->check = csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
out:
return segs;
@@ -2372,6 +2374,23 @@ void __tcp_put_md5sig_pool(void)
EXPORT_SYMBOL(__tcp_put_md5sig_pool);
#endif
+void tcp_done(struct sock *sk)
+{
+ if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+
+ tcp_set_state(sk, TCP_CLOSE);
+ tcp_clear_xmit_timers(sk);
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+ else
+ inet_csk_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_done);
+
extern void __skb_cb_too_small_for_tcp(int, int);
extern struct tcp_congestion_ops tcp_reno;
@@ -2458,11 +2477,18 @@ void __init tcp_init(void)
sysctl_max_syn_backlog = 128;
}
- /* Allow no more than 3/4 kernel memory (usually less) allocated to TCP */
- sysctl_tcp_mem[0] = (1536 / sizeof (struct inet_bind_hashbucket)) << order;
- sysctl_tcp_mem[1] = sysctl_tcp_mem[0] * 4 / 3;
+ /* Set the pressure threshold to be a fraction of global memory that
+ * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
+ * memory, with a floor of 128 pages.
+ */
+ limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+ limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ limit = max(limit, 128UL);
+ sysctl_tcp_mem[0] = limit / 4 * 3;
+ sysctl_tcp_mem[1] = limit;
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+ /* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0ac..281c9f913257 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 5c8caf4a1244..86b26539e54b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -12,6 +12,8 @@
#include <linux/list.h>
#include <net/tcp.h>
+int sysctl_tcp_max_ssthresh = 0;
+
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -77,18 +79,19 @@ void tcp_init_congestion_control(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca;
- if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
- return;
+ /* if no choice made yet assign the current value set as default */
+ if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ if (try_module_get(ca->owner)) {
+ icsk->icsk_ca_ops = ca;
+ break;
+ }
- rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (try_module_get(ca->owner)) {
- icsk->icsk_ca_ops = ca;
- break;
+ /* fallback to next available */
}
-
+ rcu_read_unlock();
}
- rcu_read_unlock();
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
@@ -123,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
#endif
if (ca) {
- ca->non_restricted = 1; /* default is always allowed */
+ ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
list_move(&ca->list, &tcp_cong_list);
ret = 0;
}
@@ -178,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
*buf = '\0';
rcu_read_lock();
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (!ca->non_restricted)
+ if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
continue;
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -209,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
}
}
- /* pass 2 clear */
+ /* pass 2 clear old values */
list_for_each_entry_rcu(ca, &tcp_cong_list, list)
- ca->non_restricted = 0;
+ ca->flags &= ~TCP_CONG_NON_RESTRICTED;
/* pass 3 mark as allowed */
while ((name = strsep(&val, " ")) && *name) {
ca = tcp_ca_find(name);
WARN_ON(!ca);
if (ca)
- ca->non_restricted = 1;
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
}
out:
spin_unlock(&tcp_cong_list_lock);
@@ -236,6 +239,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
rcu_read_lock();
ca = tcp_ca_find(name);
+
/* no change asking for existing value */
if (ca == icsk->icsk_ca_ops)
goto out;
@@ -252,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
if (!ca)
err = -ENOENT;
- else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
err = -EPERM;
else if (!try_module_get(ca->owner))
@@ -261,7 +265,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
else {
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
- if (icsk->icsk_ca_ops->init)
+
+ if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
}
out:
@@ -271,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
/*
- * Linear increase during slow start
+ * Slow start (exponential increase) with
+ * RFC3742 Limited Slow Start (fast linear increase) support.
*/
void tcp_slow_start(struct tcp_sock *tp)
{
+ int cnt = 0;
+
if (sysctl_tcp_abc) {
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
@@ -283,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp)
*/
if (tp->bytes_acked < tp->mss_cache)
return;
-
- /* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
}
+
+ if (sysctl_tcp_max_ssthresh > 0 &&
+ tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+ cnt += sysctl_tcp_max_ssthresh>>1;
+ else
+ cnt += tp->snd_cwnd;
+
+ /* RFC3465: We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
+ cnt <<= 1;
tp->bytes_acked = 0;
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ tp->snd_cwnd_cnt += cnt;
+ while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd_cnt -= tp->snd_cwnd;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
@@ -355,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
struct tcp_congestion_ops tcp_reno = {
+ .flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
- .non_restricted = 1,
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 9a582fb4ef9f..14224487b16b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,5 +1,5 @@
/*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
*
* This is from the implementation of CUBIC TCP in
* Injong Rhee, Lisong Xu.
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
-#include <asm/div64.h>
-
/* BIC TCP Parameters */
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
@@ -93,50 +91,51 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (uint32_t) dividend / d;
-
- return dividend;
-}
-
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
*/
static u32 cubic_root(u64 a)
{
- u32 x, x1;
-
- /* Initial estimate is based on:
- * cbrt(x) = exp(log(x) / 3)
+ u32 x, b, shift;
+ /*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ * v = cbrt(x << 18) - 1
+ * cbrt(x) = (v[x] + 10) >> 6
*/
- x = 1u << (fls64(a)/3);
+ static const u8 v[] = {
+ /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
+ /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
+ /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
+ /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
+ /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
+ /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
+ /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
+ /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ b = fls64(a);
+ if (b < 7) {
+ /* a in [0..63] */
+ return ((u32)v[(u32)a] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (a >> (b * 3));
+
+ x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
/*
- * Iteration based on:
+ * Newton-Raphson iteration
* 2
* x = ( 2 * x + a / x ) / 3
* k+1 k k
*/
- do {
- x1 = x;
- x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
- } while (abs(x1 - x) > 1);
-
+ x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+ x = ((x * 341) >> 10);
return x;
}
@@ -215,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
if (ca->delay_min > 0) {
/* max increment = Smax * rtt / 0.1 */
min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
- if (ca->cnt < min_cnt)
+
+ /* use concave growth when the target is above the origin */
+ if (ca->cnt < min_cnt && t >= ca->bic_K)
ca->cnt = min_cnt;
}
@@ -333,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -401,4 +402,4 @@ module_exit(cubictcp_unregister);
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.0");
+MODULE_VERSION("2.1");
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index a291097fcc0a..43d624e5043c 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -97,10 +97,6 @@ struct hstcp {
u32 ai;
};
-static int max_ssthresh = 100;
-module_param(max_ssthresh, int, 0644);
-MODULE_PARM_DESC(max_ssthresh, "limited slow start threshold (RFC3742)");
-
static void hstcp_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -122,23 +118,9 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* RFC3742: limited slow start
- * the window is increased by 1/K MSS for each arriving ACK,
- * for K = int(cwnd/(0.5 max_ssthresh))
- */
- if (max_ssthresh > 0 && tp->snd_cwnd > max_ssthresh) {
- u32 k = max(tp->snd_cwnd / (max_ssthresh >> 1), 1U);
- if (++tp->snd_cwnd_cnt >= k) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- }
- } else {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
/* Update AIMD parameters.
*
* We want to guarantee that:
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d1..4ba4a7ae0a85 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
}
}
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f64..e5be35117223 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
- while(ca->snd_cwnd_cents >= 128) {
+ while (ca->snd_cwnd_cents >= 128) {
tp->snd_cwnd++;
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
new file mode 100644
index 000000000000..4adc47c55351
--- /dev/null
+++ b/net/ipv4/tcp_illinois.c
@@ -0,0 +1,356 @@
+/*
+ * TCP Illinois congestion control.
+ * Home page:
+ * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+ *
+ * The algorithm is described in:
+ * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
+ * for High-Speed Networks"
+ * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
+ *
+ * Implemented from description in paper and ns-2 simulation.
+ * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+#include <asm/div64.h>
+#include <net/tcp.h>
+
+#define ALPHA_SHIFT 7
+#define ALPHA_SCALE (1u<<ALPHA_SHIFT)
+#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
+#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
+#define ALPHA_BASE ALPHA_SCALE /* 1.0 */
+#define U32_MAX ((u32)~0U)
+#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
+
+#define BETA_SHIFT 6
+#define BETA_SCALE (1u<<BETA_SHIFT)
+#define BETA_MIN (BETA_SCALE/8) /* 0.125 */
+#define BETA_MAX (BETA_SCALE/2) /* 0.5 */
+#define BETA_BASE BETA_MAX
+
+static int win_thresh __read_mostly = 15;
+module_param(win_thresh, int, 0);
+MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
+
+static int theta __read_mostly = 5;
+module_param(theta, int, 0);
+MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
+
+/* TCP Illinois Parameters */
+struct illinois {
+ u64 sum_rtt; /* sum of rtt's measured within last rtt */
+ u16 cnt_rtt; /* # of rtts measured within last rtt */
+ u32 base_rtt; /* min of all rtt in usec */
+ u32 max_rtt; /* max of all rtt in usec */
+ u32 end_seq; /* right edge of current RTT */
+ u32 alpha; /* Additive increase */
+ u32 beta; /* Muliplicative decrease */
+ u16 acked; /* # packets acked by current ACK */
+ u8 rtt_above; /* average rtt has gone above threshold */
+ u8 rtt_low; /* # of rtts measurements below threshold */
+};
+
+static void rtt_reset(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ ca->end_seq = tp->snd_nxt;
+ ca->cnt_rtt = 0;
+ ca->sum_rtt = 0;
+
+ /* TODO: age max_rtt? */
+}
+
+static void tcp_illinois_init(struct sock *sk)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+
+ ca->alpha = ALPHA_MAX;
+ ca->beta = BETA_BASE;
+ ca->base_rtt = 0x7fffffff;
+ ca->max_rtt = 0;
+
+ ca->acked = 0;
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+
+ rtt_reset(sk);
+}
+
+/* Measure RTT for each ack. */
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+ u32 rtt;
+
+ ca->acked = pkts_acked;
+
+ rtt = ktime_to_us(net_timedelta(last));
+
+ /* ignore bogus values, this prevents wraparound in alpha math */
+ if (rtt > RTT_MAX)
+ rtt = RTT_MAX;
+
+ /* keep track of minimum RTT seen so far */
+ if (ca->base_rtt > rtt)
+ ca->base_rtt = rtt;
+
+ /* and max */
+ if (ca->max_rtt < rtt)
+ ca->max_rtt = rtt;
+
+ ++ca->cnt_rtt;
+ ca->sum_rtt += rtt;
+}
+
+/* Maximum queuing delay */
+static inline u32 max_delay(const struct illinois *ca)
+{
+ return ca->max_rtt - ca->base_rtt;
+}
+
+/* Average queuing delay */
+static inline u32 avg_delay(const struct illinois *ca)
+{
+ u64 t = ca->sum_rtt;
+
+ do_div(t, ca->cnt_rtt);
+ return t - ca->base_rtt;
+}
+
+/*
+ * Compute value of alpha used for additive increase.
+ * If small window then use 1.0, equivalent to Reno.
+ *
+ * For larger windows, adjust based on average delay.
+ * A. If average delay is at minimum (we are uncongested),
+ * then use large alpha (10.0) to increase faster.
+ * B. If average delay is at maximum (getting congested)
+ * then use small alpha (0.3)
+ *
+ * The result is a convex window growth curve.
+ */
+static u32 alpha(struct illinois *ca, u32 da, u32 dm)
+{
+ u32 d1 = dm / 100; /* Low threshold */
+
+ if (da <= d1) {
+ /* If never got out of low delay zone, then use max */
+ if (!ca->rtt_above)
+ return ALPHA_MAX;
+
+ /* Wait for 5 good RTT's before allowing alpha to go alpha max.
+ * This prevents one good RTT from causing sudden window increase.
+ */
+ if (++ca->rtt_low < theta)
+ return ca->alpha;
+
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+ return ALPHA_MAX;
+ }
+
+ ca->rtt_above = 1;
+
+ /*
+ * Based on:
+ *
+ * (dm - d1) amin amax
+ * k1 = -------------------
+ * amax - amin
+ *
+ * (dm - d1) amin
+ * k2 = ---------------- - d1
+ * amax - amin
+ *
+ * k1
+ * alpha = ----------
+ * k2 + da
+ */
+
+ dm -= d1;
+ da -= d1;
+ return (dm * ALPHA_MAX) /
+ (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
+}
+
+/*
+ * Beta used for multiplicative decrease.
+ * For small window sizes returns same value as Reno (0.5)
+ *
+ * If delay is small (10% of max) then beta = 1/8
+ * If delay is up to 80% of max then beta = 1/2
+ * In between is a linear function
+ */
+static u32 beta(u32 da, u32 dm)
+{
+ u32 d2, d3;
+
+ d2 = dm / 10;
+ if (da <= d2)
+ return BETA_MIN;
+
+ d3 = (8 * dm) / 10;
+ if (da >= d3 || d3 <= d2)
+ return BETA_MAX;
+
+ /*
+ * Based on:
+ *
+ * bmin d3 - bmax d2
+ * k3 = -------------------
+ * d3 - d2
+ *
+ * bmax - bmin
+ * k4 = -------------
+ * d3 - d2
+ *
+ * b = k3 + k4 da
+ */
+ return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
+ / (d3 - d2);
+}
+
+/* Update alpha and beta values once per RTT */
+static void update_params(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (tp->snd_cwnd < win_thresh) {
+ ca->alpha = ALPHA_BASE;
+ ca->beta = BETA_BASE;
+ } else if (ca->cnt_rtt > 0) {
+ u32 dm = max_delay(ca);
+ u32 da = avg_delay(ca);
+
+ ca->alpha = alpha(ca, da, dm);
+ ca->beta = beta(da, dm);
+ }
+
+ rtt_reset(sk);
+}
+
+/*
+ * In case of loss, reset to default values
+ */
+static void tcp_illinois_state(struct sock *sk, u8 new_state)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (new_state == TCP_CA_Loss) {
+ ca->alpha = ALPHA_BASE;
+ ca->beta = BETA_BASE;
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+ rtt_reset(sk);
+ }
+}
+
+/*
+ * Increase window in response to successful acknowledgment.
+ */
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+ u32 in_flight, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (after(ack, ca->end_seq))
+ update_params(sk);
+
+ /* RFC2861 only increase cwnd if fully utilized */
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
+ /* In slow start */
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
+ else {
+ u32 delta;
+
+ /* snd_cwnd_cnt is # of packets since last cwnd increment */
+ tp->snd_cwnd_cnt += ca->acked;
+ ca->acked = 1;
+
+ /* This is close approximation of:
+ * tp->snd_cwnd += alpha/tp->snd_cwnd
+ */
+ delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
+ if (delta >= tp->snd_cwnd) {
+ tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
+ (u32) tp->snd_cwnd_clamp);
+ tp->snd_cwnd_cnt = 0;
+ }
+ }
+}
+
+static u32 tcp_illinois_ssthresh(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ /* Multiplicative decrease */
+ return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
+}
+
+
+/* Extract info for Tcp socket info provided via netlink. */
+static void tcp_illinois_info(struct sock *sk, u32 ext,
+ struct sk_buff *skb)
+{
+ const struct illinois *ca = inet_csk_ca(sk);
+
+ if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+ struct tcpvegas_info info = {
+ .tcpv_enabled = 1,
+ .tcpv_rttcnt = ca->cnt_rtt,
+ .tcpv_minrtt = ca->base_rtt,
+ };
+ u64 t = ca->sum_rtt;
+
+ do_div(t, ca->cnt_rtt);
+ info.tcpv_rtt = t;
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ }
+}
+
+static struct tcp_congestion_ops tcp_illinois = {
+ .flags = TCP_CONG_RTT_STAMP,
+ .init = tcp_illinois_init,
+ .ssthresh = tcp_illinois_ssthresh,
+ .min_cwnd = tcp_reno_min_cwnd,
+ .cong_avoid = tcp_illinois_cong_avoid,
+ .set_state = tcp_illinois_state,
+ .get_info = tcp_illinois_info,
+ .pkts_acked = tcp_illinois_acked,
+
+ .owner = THIS_MODULE,
+ .name = "illinois",
+};
+
+static int __init tcp_illinois_register(void)
+{
+ BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
+ return tcp_register_congestion_control(&tcp_illinois);
+}
+
+static void __exit tcp_illinois_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_illinois);
+}
+
+module_init(tcp_illinois_register);
+module_exit(tcp_illinois_unregister);
+
+MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP Illinois");
+MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687ac..7641b2761a14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_nometrics_save __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -100,6 +101,7 @@ int sysctl_tcp_abc __read_mostly;
#define FLAG_ECE 0x40 /* ECE in this ACK */
#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +112,8 @@ int sysctl_tcp_abc __read_mostly;
#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
+#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
/* Adapt the MSS value used to make delayed ack decision to the
@@ -136,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
*
* "len" is invariant segment length, including TCP header.
*/
- len += skb->data - skb->h.raw;
+ len += skb->data - skb_transport_header(skb);
if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
/* If PSH is not set, packet should be
* full sized, provided peer TCP is not badly broken.
@@ -144,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
* to handle super-low mtu links fairly.
*/
(len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
- !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+ !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
/* Subtract also invariant (if peer is RFC compliant),
* tcp header plus fixed timestamp option length.
* Resulting "len" is MSS free of SACK jitter.
@@ -231,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
- const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(skb->truesize)/2;
int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -248,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
return 0;
}
-static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static void tcp_grow_window(struct sock *sk,
struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -263,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
if (tcp_win_from_space(skb->truesize) <= skb->len)
incr = 2*tp->advmss;
else
- incr = __tcp_grow_window(sk, tp, skb);
+ incr = __tcp_grow_window(sk, skb);
if (incr) {
tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -326,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
-static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+static void tcp_clamp_window(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ack.quick = 0;
@@ -499,8 +506,9 @@ new_measure:
* each ACK we send, he increments snd_cwnd and transmits more of his
* queue. -DaveM
*/
-static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
u32 now;
@@ -541,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
TCP_ECN_check_ce(tp, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, tp, skb);
+ tcp_grow_window(sk, skb);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -574,7 +582,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
* does not matter how to _calculate_ it. Seems, it was trap
* that VJ failed to avoid. 8)
*/
- if(m == 0)
+ if (m == 0)
m = 1;
if (tp->srtt != 0) {
m -= (tp->srtt >> 3); /* m is now error in rtt est */
@@ -759,15 +767,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
}
/* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk)
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
{
struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
tp->prior_ssthresh = 0;
tp->bytes_acked = 0;
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ if (icsk->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = 0;
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ if (set_ssthresh)
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp) + 1U);
tp->snd_cwnd_cnt = 0;
@@ -934,7 +944,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+ unsigned char *ptr = (skb_transport_header(ack_skb) +
+ TCP_SKB_CB(ack_skb)->sacked);
struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
struct sk_buff *cached_skb;
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -1038,7 +1049,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
cached_skb = tp->fastpath_skb_hint;
cached_fack_count = tp->fastpath_cnt_hint;
if (!cached_skb) {
- cached_skb = sk->sk_write_queue.next;
+ cached_skb = tcp_write_queue_head(sk);
cached_fack_count = 0;
}
@@ -1055,10 +1066,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (after(end_seq, tp->high_seq))
flag |= FLAG_DATA_LOST;
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
int in_sack, pcount;
u8 sacked;
+ if (skb == tcp_send_head(sk))
+ break;
+
cached_skb = skb;
cached_fack_count = fack_count;
if (i == first_sack_index) {
@@ -1159,6 +1173,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
/* clear lost hint */
tp->retransmit_skb_hint = NULL;
}
+ /* SACK enhanced F-RTO detection.
+ * Set flag if and only if non-rexmitted
+ * segments below frto_highmark are
+ * SACKed (RFC4138; Appendix B).
+ * Clearing correct due to in-order walk
+ */
+ if (after(end_seq, tp->frto_highmark)) {
+ flag &= ~FLAG_ONLY_ORIG_SACKED;
+ } else {
+ if (!(sacked & TCPCB_RETRANS))
+ flag |= FLAG_ONLY_ORIG_SACKED;
+ }
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1195,7 +1221,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
struct sk_buff *skb;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
break;
if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1224,7 +1252,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->left_out = tp->sacked_out + tp->lost_out;
- if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+ if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+ (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
#if FASTRETRANS_DEBUG > 0
@@ -1236,9 +1265,49 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
return flag;
}
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* F-RTO can only be used if TCP has never retransmitted anything other than
+ * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
+ */
+int tcp_use_frto(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+
+ if (!sysctl_tcp_frto)
+ return 0;
+
+ if (IsSackFrto())
+ return 1;
+
+ /* Avoid expensive walking of rexmit queue if possible */
+ if (tp->retrans_out > 1)
+ return 0;
+
+ skb = tcp_write_queue_head(sk);
+ skb = tcp_write_queue_next(sk, skb); /* Skips head */
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+ if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+ return 0;
+ /* Short-circuit when first non-SACKed skb has been checked */
+ if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+ break;
+ }
+ return 1;
+}
+
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
+ * keep retrans_out counting accurate (with SACK F-RTO, other than head
+ * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
+ * bits are handled if the Loss state is really to be entered (in
+ * tcp_enter_frto_loss).
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ * "Reduce ssthresh if it has not yet been made inside this window."
*/
void tcp_enter_frto(struct sock *sk)
{
@@ -1246,39 +1315,69 @@ void tcp_enter_frto(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- tp->frto_counter = 1;
-
- if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+ if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
tp->snd_una == tp->high_seq ||
- (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+ ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+ !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ /* Our state is too optimistic in ssthresh() call because cwnd
+ * is not reduced until tcp_enter_frto_loss() when previous FRTO
+ * recovery has not yet completed. Pattern would be this: RTO,
+ * Cumulative ACK, RTO (2xRTO for the same segment does not end
+ * up here twice).
+ * RFC4138 should be more specific on what to do, even though
+ * RTO is quite unlikely to occur after the first Cumulative ACK
+ * due to back-off and complexity of triggering events ...
+ */
+ if (tp->frto_counter) {
+ u32 stored_cwnd;
+ stored_cwnd = tp->snd_cwnd;
+ tp->snd_cwnd = 2;
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ tp->snd_cwnd = stored_cwnd;
+ } else {
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ }
+ /* ... in theory, cong.control module could do "any tricks" in
+ * ssthresh(), which means that ca_state, lost bits and lost_out
+ * counter would have to be faked before the call occurs. We
+ * consider that too expensive, unlikely and hacky, so modules
+ * using these in ssthresh() must deal these incompatibility
+ * issues if they receives CA_EVENT_FRTO and frto_counter != 0
+ */
tcp_ca_event(sk, CA_EVENT_FRTO);
}
- /* Have to clear retransmission markers here to keep the bookkeeping
- * in shape, even though we are not yet in Loss state.
- * If something was really lost, it is eventually caught up
- * in tcp_enter_frto_loss.
- */
- tp->retrans_out = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
- sk_stream_for_retrans_queue(skb, sk) {
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+ skb = tcp_write_queue_head(sk);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+ tp->retrans_out -= tcp_skb_pcount(skb);
}
tcp_sync_left_out(tp);
- tcp_set_ca_state(sk, TCP_CA_Open);
- tp->frto_highmark = tp->snd_nxt;
+ /* Earlier loss recovery underway (see RFC4138; Appendix B).
+ * The last condition is necessary at least in tp->frto_counter case.
+ */
+ if (IsSackFrto() && (tp->frto_counter ||
+ ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+ after(tp->high_seq, tp->snd_una)) {
+ tp->frto_highmark = tp->high_seq;
+ } else {
+ tp->frto_highmark = tp->snd_nxt;
+ }
+ tcp_set_ca_state(sk, TCP_CA_Disorder);
+ tp->high_seq = tp->snd_nxt;
+ tp->frto_counter = 1;
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the traditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -1287,10 +1386,23 @@ static void tcp_enter_frto_loss(struct sock *sk)
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
+ tp->retrans_out = 0;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
cnt += tcp_skb_pcount(skb);
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+ /*
+ * Count the retransmission made on RTO correctly (only when
+ * waiting for the first ACK and did not get it)...
+ */
+ if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+ tp->retrans_out += tcp_skb_pcount(skb);
+ /* ...enter this if branch just for the first segment */
+ flag |= FLAG_DATA_ACKED;
+ } else {
+ TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ }
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
@@ -1308,7 +1420,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
}
tcp_sync_left_out(tp);
- tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
@@ -1366,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how)
if (!how)
tp->undo_marker = tp->snd_una;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
cnt += tcp_skb_pcount(skb);
if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
tp->undo_marker = 0;
@@ -1401,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
* receiver _host_ is heavily congested (or buggy).
* Do processing similar to RTO timeout.
*/
- if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+ if ((skb = tcp_write_queue_head(sk)) != NULL &&
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
struct inet_connection_sock *icsk = inet_csk(sk);
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
tcp_enter_loss(sk, 1);
icsk->icsk_retransmits++;
- tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+ tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
icsk->icsk_rto, TCP_RTO_MAX);
return 1;
@@ -1426,10 +1540,12 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
}
-static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+static inline int tcp_head_timedout(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
return tp->packets_out &&
- tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
+ tcp_skb_timedout(sk, tcp_write_queue_head(sk));
}
/* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1525,10 +1641,15 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
* Main question: may we further continue forward transmission
* with the same cwnd?
*/
-static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+static int tcp_time_to_recover(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out;
+ /* Do not perform any recovery during FRTO algorithm */
+ if (tp->frto_counter)
+ return 0;
+
/* Trick#1: The loss is proven. */
if (tp->lost_out)
return 1;
@@ -1540,7 +1661,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
/* Trick#3 : when we use RFC2988 timer restart, fast
* retransmit can be triggered by timeout of queue head.
*/
- if (tcp_head_timedout(sk, tp))
+ if (tcp_head_timedout(sk))
return 1;
/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1549,7 +1670,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
packets_out = tp->packets_out;
if (packets_out <= tp->reordering &&
tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
- !tcp_may_send_now(sk, tp)) {
+ !tcp_may_send_now(sk)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
*/
@@ -1589,8 +1710,10 @@ static void tcp_add_reno_sack(struct sock *sk)
/* Account for ACK, ACKing some data in Reno Recovery phase. */
-static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
if (acked-1 >= tp->sacked_out)
@@ -1609,9 +1732,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
}
/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+static void tcp_mark_head_lost(struct sock *sk,
int packets, u32 high_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt;
@@ -1620,11 +1744,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
skb = tp->lost_skb_hint;
cnt = tp->lost_cnt_hint;
} else {
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
cnt = 0;
}
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
@@ -1638,12 +1764,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
/* clear xmit_retransmit_queue hints
* if this is beyond hint */
- if(tp->retransmit_skb_hint != NULL &&
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
-
+ if (tp->retransmit_skb_hint != NULL &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
tp->retransmit_skb_hint = NULL;
- }
+
}
}
tcp_sync_left_out(tp);
@@ -1651,15 +1776,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
/* Account newly detected lost packet(s) */
-static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+static void tcp_update_scoreboard(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (IsFack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+ tcp_mark_head_lost(sk, lost, tp->high_seq);
} else {
- tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+ tcp_mark_head_lost(sk, 1, tp->high_seq);
}
/* New heuristics: it is possible only after we switched
@@ -1667,13 +1794,15 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
* Hence, we can detect timed out packets during fast
* retransmit without falling to slow start.
*/
- if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
+ if (!IsReno(tp) && tcp_head_timedout(sk)) {
struct sk_buff *skb;
skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
- : sk->sk_write_queue.next;
+ : tcp_write_queue_head(sk);
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (!tcp_skb_timedout(sk, skb))
break;
@@ -1745,9 +1874,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
/* Undo procedures. */
#if FASTRETRANS_DEBUG > 1
-static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+static void DBGUNDO(struct sock *sk, const char *msg)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
+
printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
msg,
NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1793,13 +1924,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
}
/* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_recovery(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_may_undo(tp)) {
/* Happy end! We did not retransmit anything
* or our original transmission succeeded.
*/
- DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1819,10 +1952,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
}
/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+static void tcp_try_undo_dsack(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tp->undo_marker && !tp->undo_retrans) {
- DBGUNDO(sk, tp, "D-SACK");
+ DBGUNDO(sk, "D-SACK");
tcp_undo_cwr(sk, 1);
tp->undo_marker = 0;
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1831,9 +1966,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
/* Undo during fast recovery after partial ACK. */
-static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
- int acked)
+static int tcp_try_undo_partial(struct sock *sk, int acked)
{
+ struct tcp_sock *tp = tcp_sk(sk);
/* Partial ACK arrived. Force Hoe's retransmit. */
int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
@@ -1846,7 +1981,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
- DBGUNDO(sk, tp, "Hoe");
+ DBGUNDO(sk, "Hoe");
tcp_undo_cwr(sk, 0);
NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
@@ -1860,17 +1995,21 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
}
/* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_loss(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_may_undo(tp)) {
struct sk_buff *skb;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
clear_all_retrans_hints(tp);
- DBGUNDO(sk, tp, "partial loss");
+ DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
tp->left_out = tp->sacked_out;
tcp_undo_cwr(sk, 1);
@@ -1892,15 +2031,17 @@ static inline void tcp_complete_cwr(struct sock *sk)
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
-static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
tp->left_out = tp->sacked_out;
if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
- tcp_enter_cwr(sk);
+ tcp_enter_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
@@ -1987,7 +2128,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+ tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
@@ -1997,14 +2138,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
- if (!sysctl_tcp_frto)
- BUG_TRAP(tp->retrans_out == 0);
+ BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (icsk->icsk_ca_state) {
case TCP_CA_Loss:
icsk->icsk_retransmits = 0;
- if (tcp_try_undo_recovery(sk, tp))
+ if (tcp_try_undo_recovery(sk))
return;
break;
@@ -2018,7 +2158,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
break;
case TCP_CA_Disorder:
- tcp_try_undo_dsack(sk, tp);
+ tcp_try_undo_dsack(sk);
if (!tp->undo_marker ||
/* For SACK case do not Open to allow to undo
* catching for all duplicate ACKs. */
@@ -2031,7 +2171,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
case TCP_CA_Recovery:
if (IsReno(tp))
tcp_reset_reno_sack(tp);
- if (tcp_try_undo_recovery(sk, tp))
+ if (tcp_try_undo_recovery(sk))
return;
tcp_complete_cwr(sk);
break;
@@ -2047,14 +2187,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
} else {
int acked = prior_packets - tp->packets_out;
if (IsReno(tp))
- tcp_remove_reno_sacks(sk, tp, acked);
- is_dupack = tcp_try_undo_partial(sk, tp, acked);
+ tcp_remove_reno_sacks(sk, acked);
+ is_dupack = tcp_try_undo_partial(sk, acked);
}
break;
case TCP_CA_Loss:
if (flag&FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
- if (!tcp_try_undo_loss(sk, tp)) {
+ if (!tcp_try_undo_loss(sk)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
return;
@@ -2071,10 +2211,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
}
if (icsk->icsk_ca_state == TCP_CA_Disorder)
- tcp_try_undo_dsack(sk, tp);
+ tcp_try_undo_dsack(sk);
- if (!tcp_time_to_recover(sk, tp)) {
- tcp_try_to_open(sk, tp, flag);
+ if (!tcp_time_to_recover(sk)) {
+ tcp_try_to_open(sk, flag);
return;
}
@@ -2113,8 +2253,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
- if (is_dupack || tcp_head_timedout(sk, tp))
- tcp_update_scoreboard(sk, tp);
+ if (is_dupack || tcp_head_timedout(sk))
+ tcp_update_scoreboard(sk);
tcp_cwnd_down(sk);
tcp_xmit_retransmit_queue(sk);
}
@@ -2190,8 +2330,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
* RFC2988 recommends to restart timer to now+rto.
*/
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
@@ -2255,14 +2397,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
-static u32 tcp_usrtt(struct timeval *tv)
-{
- struct timeval now;
-
- do_gettimeofday(&now);
- return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
/* Remove acknowledged frames from the retransmission queue. */
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
@@ -2273,12 +2407,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
int acked = 0;
__s32 seq_rtt = -1;
u32 pkts_acked = 0;
- void (*rtt_sample)(struct sock *sk, u32 usrtt)
- = icsk->icsk_ca_ops->rtt_sample;
- struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+ ktime_t last_ackt = ktime_set(0,0);
- while ((skb = skb_peek(&sk->sk_write_queue)) &&
- skb != sk->sk_send_head) {
+ while ((skb = tcp_write_queue_head(sk)) &&
+ skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u8 sacked = scb->sacked;
@@ -2318,13 +2450,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
if (sacked) {
if (sacked & TCPCB_RETRANS) {
- if(sacked & TCPCB_SACKED_RETRANS)
+ if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= tcp_skb_pcount(skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2337,23 +2469,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
sk_stream_free_skb(sk, skb);
clear_all_retrans_hints(tp);
}
if (acked&FLAG_ACKED) {
+ const struct tcp_congestion_ops *ca_ops
+ = inet_csk(sk)->icsk_ca_ops;
+
tcp_ack_update_rtt(sk, acked, seq_rtt);
- tcp_ack_packets_out(sk, tp);
- if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
- (*rtt_sample)(sk, tcp_usrtt(&tv));
+ tcp_ack_packets_out(sk);
- if (icsk->icsk_ca_ops->pkts_acked)
- icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
}
#if FASTRETRANS_DEBUG > 0
@@ -2390,7 +2523,7 @@ static void tcp_ack_probe(struct sock *sk)
/* Was it a usable window open? */
- if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+ if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
tp->snd_una + tp->snd_wnd)) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2433,13 +2566,14 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
* and in FreeBSD. NetBSD's one is even worse.) is wrong.
*/
-static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb, u32 ack, u32 ack_seq)
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+ u32 ack_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
int flag = 0;
- u32 nwin = ntohs(skb->h.th->window);
+ u32 nwin = ntohs(tcp_hdr(skb)->window);
- if (likely(!skb->h.th->syn))
+ if (likely(!tcp_hdr(skb)->syn))
nwin <<= tp->rx_opt.snd_wscale;
if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2453,7 +2587,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
* fast path is recovered for sending TCP.
*/
tp->pred_flags = 0;
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
if (nwin > tp->max_window) {
tp->max_window = nwin;
@@ -2467,39 +2601,139 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
return flag;
}
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+/* A very conservative spurious RTO response algorithm: reduce cwnd and
+ * continue in congestion avoidance.
+ */
+static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+{
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tp->snd_cwnd_cnt = 0;
+ tcp_moderate_cwnd(tp);
+}
+
+/* A conservative spurious RTO response algorithm: reduce cwnd using
+ * rate halving and continue in congestion avoidance.
+ */
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
+{
+ tcp_enter_cwr(sk, 0);
+}
+
+static void tcp_undo_spur_to_response(struct sock *sk, int flag)
+{
+ if (flag&FLAG_ECE)
+ tcp_ratehalving_spur_to_response(sk);
+ else
+ tcp_undo_cwr(sk, 1);
+}
+
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
+ * On First ACK, send two new segments out.
+ * On Second ACK, RTO was likely spurious. Do spurious response (response
+ * algorithm is not part of the F-RTO detection algorithm
+ * given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
+ * of Nagle, this is done using frto_counter states 2 and 3, when a new data
+ * segment of any size sent during F-RTO, state 2 is upgraded to 3.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * SACK version:
+ * on first step, wait until first cumulative ACK arrives, then move to
+ * the second step. In second step, the next ACK decides.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ * - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ * called when tcp_use_frto() showed green light
+ * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ * - tcp_enter_frto_loss() is called if there is not enough evidence
+ * to prove that the RTO is indeed spurious. It transfers the control
+ * from F-RTO to the conventional RTO recovery
+ */
+static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
tcp_sync_left_out(tp);
- if (tp->snd_una == prior_snd_una ||
- !before(tp->snd_una, tp->frto_highmark)) {
- /* RTO was caused by loss, start retransmitting in
- * go-back-N slow start
+ /* Duplicate the behavior from Loss state (fastretrans_alert) */
+ if (flag&FLAG_DATA_ACKED)
+ inet_csk(sk)->icsk_retransmits = 0;
+
+ if (!before(tp->snd_una, tp->frto_highmark)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
+ return 1;
+ }
+
+ if (!IsSackFrto() || IsReno(tp)) {
+ /* RFC4138 shortcoming in step 2; should also have case c):
+ * ACK isn't duplicate nor advances window, e.g., opposite dir
+ * data, winupdate
*/
- tcp_enter_frto_loss(sk);
- return;
+ if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+ !(flag&FLAG_FORWARD_PROGRESS))
+ return 1;
+
+ if (!(flag&FLAG_DATA_ACKED)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
+ flag);
+ return 1;
+ }
+ } else {
+ if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+ /* Prevent sending of new data. */
+ tp->snd_cwnd = min(tp->snd_cwnd,
+ tcp_packets_in_flight(tp));
+ return 1;
+ }
+
+ if ((tp->frto_counter >= 2) &&
+ (!(flag&FLAG_FORWARD_PROGRESS) ||
+ ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+ /* RFC4138 shortcoming (see comment above) */
+ if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+ return 1;
+
+ tcp_enter_frto_loss(sk, 3, flag);
+ return 1;
+ }
}
if (tp->frto_counter == 1) {
- /* First ACK after RTO advances the window: allow two new
- * segments out.
- */
+ /* Sending of the next skb must be allowed or no FRTO */
+ if (!tcp_send_head(sk) ||
+ after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
+ tp->snd_una + tp->snd_wnd)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
+ flag);
+ return 1;
+ }
+
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+ tp->frto_counter = 2;
+ return 1;
} else {
- /* Also the second ACK after RTO advances the window.
- * The RTO was likely spurious. Reduce cwnd and continue
- * in congestion avoidance
- */
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tcp_moderate_cwnd(tp);
+ switch (sysctl_tcp_frto_response) {
+ case 2:
+ tcp_undo_spur_to_response(sk, flag);
+ break;
+ case 1:
+ tcp_conservative_spur_to_response(tp);
+ break;
+ default:
+ tcp_ratehalving_spur_to_response(sk);
+ break;
+ }
+ tp->frto_counter = 0;
}
-
- /* F-RTO affects on two new ACKs following RTO.
- * At latest on third ACK the TCP behavior is back to normal.
- */
- tp->frto_counter = (tp->frto_counter + 1) % 3;
+ return 0;
}
/* This routine deals with incoming acks, but not outgoing ones. */
@@ -2513,6 +2747,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 prior_in_flight;
s32 seq_rtt;
int prior_packets;
+ int frto_cwnd = 0;
/* If the ack is newer than sent or older than previous acks
* then we can probably ignore it.
@@ -2549,12 +2784,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
else
NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
- flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+ flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+ if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2575,15 +2810,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tp->frto_counter)
- tcp_process_frto(sk, prior_snd_una);
+ frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+ tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
- if ((flag & FLAG_DATA_ACKED))
+ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
}
@@ -2599,7 +2835,7 @@ no_queue:
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (sk->sk_send_head)
+ if (tcp_send_head(sk))
tcp_ack_probe(sk);
return 1;
@@ -2620,13 +2856,13 @@ uninteresting_ack:
void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
{
unsigned char *ptr;
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
int length=(th->doff*4)-sizeof(struct tcphdr);
ptr = (unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
- while(length>0) {
+ while (length > 0) {
int opcode=*ptr++;
int opsize;
@@ -2642,9 +2878,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
return;
if (opsize > length)
return; /* don't parse partial options */
- switch(opcode) {
+ switch (opcode) {
case TCPOPT_MSS:
- if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+ if (opsize==TCPOLEN_MSS && th->syn && !estab) {
u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
if (in_mss) {
if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2654,12 +2890,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_WINDOW:
- if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+ if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
if (sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *) ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > 14) {
- if(net_ratelimit())
+ if (net_ratelimit())
printk(KERN_INFO "tcp_parse_options: Illegal window "
"scaling value %d >14 received.\n",
snd_wscale);
@@ -2669,7 +2905,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_TIMESTAMP:
- if(opsize==TCPOLEN_TIMESTAMP) {
+ if (opsize==TCPOLEN_TIMESTAMP) {
if ((estab && opt_rx->tstamp_ok) ||
(!estab && sysctl_tcp_timestamps)) {
opt_rx->saw_tstamp = 1;
@@ -2679,7 +2915,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_SACK_PERM:
- if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+ if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
if (sysctl_tcp_sack) {
opt_rx->sack_ok = 1;
tcp_sack_reset(opt_rx);
@@ -2688,7 +2924,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
break;
case TCPOPT_SACK:
- if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
!((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
opt_rx->sack_ok) {
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2701,10 +2937,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
*/
break;
#endif
- };
+ }
+
ptr+=opsize-2;
length-=opsize;
- };
+ }
}
}
@@ -2737,7 +2974,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
static inline void tcp_store_ts_recent(struct tcp_sock *tp)
{
tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
- tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ tp->rx_opt.ts_recent_stamp = get_seconds();
}
static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2750,8 +2987,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
* Not only, also it occurs for expired timestamps.
*/
- if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
- xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+ get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
tcp_store_ts_recent(tp);
}
}
@@ -2782,7 +3019,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
u32 seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -2803,7 +3040,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
{
const struct tcp_sock *tp = tcp_sk(sk);
return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
- xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+ get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
!tcp_disordered_ack(sk, skb));
}
@@ -2910,7 +3147,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
__FUNCTION__, sk->sk_state);
break;
- };
+ }
/* It _is_ possible, that we have something out-of-order _after_ FIN.
* Probably, we should reset in this case. For now drop them.
@@ -3009,7 +3246,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
*/
tp->rx_opt.num_sacks--;
tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
- for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+ for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
sp[i] = sp[i+1];
continue;
}
@@ -3062,7 +3299,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
tp->rx_opt.num_sacks--;
sp--;
}
- for(; this_sack > 0; this_sack--, sp--)
+ for (; this_sack > 0; this_sack--, sp--)
*sp = *(sp-1);
new_sack:
@@ -3088,7 +3325,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
return;
}
- for(this_sack = 0; this_sack < num_sacks; ) {
+ for (this_sack = 0; this_sack < num_sacks; ) {
/* Check if the start of the sack is covered by RCV.NXT. */
if (!before(tp->rcv_nxt, sp->start_seq)) {
int i;
@@ -3144,8 +3381,8 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->h.th->fin)
- tcp_fin(skb, sk, skb->h.th);
+ if (tcp_hdr(skb)->fin)
+ tcp_fin(skb, sk, tcp_hdr(skb));
}
}
@@ -3153,7 +3390,7 @@ static int tcp_prune_queue(struct sock *sk);
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1;
@@ -3210,9 +3447,9 @@ queue_and_out:
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->len)
- tcp_event_data_recv(sk, tp, skb);
- if(th->fin)
+ if (skb->len)
+ tcp_event_data_recv(sk, skb);
+ if (th->fin)
tcp_fin(skb, sk, th);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -3228,7 +3465,7 @@ queue_and_out:
if (tp->rx_opt.num_sacks)
tcp_sack_remove(tp);
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
if (eaten > 0)
__kfree_skb(skb);
@@ -3392,7 +3629,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
- if (!skb->h.th->syn && !skb->h.th->fin &&
+ if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start) ||
(skb->next != tail &&
@@ -3403,7 +3640,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
start = TCP_SKB_CB(skb)->end_seq;
skb = skb->next;
}
- if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+ if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
return;
while (before(start, end)) {
@@ -3419,11 +3656,14 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
nskb = alloc_skb(copy+header, GFP_ATOMIC);
if (!nskb)
return;
+
+ skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
+ skb_set_network_header(nskb, (skb_network_header(skb) -
+ skb->head));
+ skb_set_transport_header(nskb, (skb_transport_header(skb) -
+ skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
- nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
- nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
- nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_insert(nskb, skb->prev, skb, list);
@@ -3449,7 +3689,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
- if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+ if (skb == tail ||
+ tcp_hdr(skb)->syn ||
+ tcp_hdr(skb)->fin)
return;
}
}
@@ -3514,7 +3756,7 @@ static int tcp_prune_queue(struct sock *sk)
NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- tcp_clamp_window(sk, tp);
+ tcp_clamp_window(sk);
else if (tcp_memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
@@ -3583,8 +3825,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
/* If the user specified a specific send buffer setting, do
* not modify it.
*/
@@ -3616,7 +3860,7 @@ static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_should_expand_sndbuf(sk, tp)) {
+ if (tcp_should_expand_sndbuf(sk)) {
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3640,9 +3884,9 @@ static void tcp_check_space(struct sock *sk)
}
}
-static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk)
{
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
tcp_check_space(sk);
}
@@ -3790,7 +4034,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
int err;
local_bh_enable();
- if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
else
err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -3822,7 +4066,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
{
- return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ return !skb_csum_unnecessary(skb) &&
__tcp_checksum_complete_user(sk, skb);
}
@@ -3840,7 +4084,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
- if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
@@ -3856,7 +4100,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
tcp_rcv_space_adjust(sk);
if ((tp->ucopy.len == 0) ||
- (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+ (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
(atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
@@ -3976,7 +4220,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
*/
tcp_ack(sk, skb, 0);
__kfree_skb(skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4047,12 +4291,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
}
- tcp_event_data_recv(sk, tp, skb);
+ tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
/* Well, only one small jumplet in fast path... */
tcp_ack(sk, skb, FLAG_DATA);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
}
@@ -4109,7 +4353,7 @@ slow_path:
goto discard;
}
- if(th->rst) {
+ if (th->rst) {
tcp_reset(sk);
goto discard;
}
@@ -4124,7 +4368,7 @@ slow_path:
}
step5:
- if(th->ack)
+ if (th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH);
tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4135,7 +4379,7 @@ step5:
/* step 7: process the segment text */
tcp_data_queue(sk, skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
return 0;
@@ -4412,13 +4656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
case TCP_LISTEN:
- if(th->ack)
+ if (th->ack)
return 1;
- if(th->rst)
+ if (th->rst)
goto discard;
- if(th->syn) {
+ if (th->syn) {
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
@@ -4452,7 +4696,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
return 0;
}
@@ -4474,7 +4718,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
/* step 2: check RST bit */
- if(th->rst) {
+ if (th->rst) {
tcp_reset(sk);
goto discard;
}
@@ -4497,7 +4741,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (th->ack) {
int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
@@ -4644,7 +4888,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bbe7d30..5a3e7f839fc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly;
#define ICMP_MIN_LENGTH 8
/* Socket used for sending RSTs */
-static struct socket *tcp_socket;
+static struct socket *tcp_socket __read_mostly;
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
@@ -125,10 +125,10 @@ void tcp_unhash(struct sock *sk)
static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
- return secure_tcp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- skb->h.th->dest,
- skb->h.th->source);
+ return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
*/
if (tcptw->tw_ts_recent_stamp &&
(twp == NULL || (sysctl_tcp_tw_reuse &&
- xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+ get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* when trying new connection.
*/
if (peer != NULL &&
- peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+ peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
struct inet_sock *inet;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
__u32 seq;
int err;
@@ -499,11 +499,12 @@ out:
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~tcp_v4_check(len, inet->saddr,
inet->daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
@@ -515,17 +516,18 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
int tcp_v4_gso_send_check(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
struct tcphdr *th;
if (!pskb_may_pull(skb, sizeof(*th)))
return -EINVAL;
- iph = skb->nh.iph;
- th = skb->h.th;
+ iph = ip_hdr(skb);
+ th = tcp_hdr(skb);
th->check = 0;
th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
@@ -546,7 +548,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
#ifdef CONFIG_TCP_MD5SIG
@@ -585,7 +587,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len = sizeof(rep.th);
#ifdef CONFIG_TCP_MD5SIG
- key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
+ key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
if (key) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -597,14 +599,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
key,
- skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
&rep.th, IPPROTO_TCP,
arg.iov[0].iov_len);
}
#endif
- arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /* XXX */
+ arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, /* XXX */
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
@@ -622,7 +624,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -670,7 +672,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
* skb->sk) holds true, but we program defensively.
*/
if (!twsk && skb->sk) {
- key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
+ key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
} else if (twsk && twsk->tw_md5_keylen) {
tw_key.key = twsk->tw_md5_key;
tw_key.keylen = twsk->tw_md5_keylen;
@@ -690,14 +692,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
key,
- skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
&rep.th, IPPROTO_TCP,
arg.iov[0].iov_len);
}
#endif
- arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /* XXX */
+ arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
@@ -745,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
skb = tcp_make_synack(sk, dst, req);
if (skb) {
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
th->check = tcp_v4_check(skb->len,
ireq->loc_addr,
@@ -781,7 +783,7 @@ static void syn_flood_warning(struct sk_buff *skb)
warntime = jiffies;
printk(KERN_INFO
"possible SYN flooding on port %d. Sending cookies.\n",
- ntohs(skb->h.th->dest));
+ ntohs(tcp_hdr(skb)->dest));
}
}
#endif
@@ -1133,8 +1135,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
*/
__u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th = skb->h.th;
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff << 2) - sizeof(struct tcphdr);
int genhash;
unsigned char *ptr;
@@ -1251,8 +1253,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct inet_request_sock *ireq;
struct tcp_options_received tmp_opt;
struct request_sock *req;
- __be32 saddr = skb->nh.iph->saddr;
- __be32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = ip_hdr(skb)->saddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
@@ -1327,7 +1329,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->rmt_addr = saddr;
ireq->opt = tcp_v4_save_options(sk, skb);
if (!want_cookie)
- TCP_ECN_create_request(req, skb->h.th);
+ TCP_ECN_create_request(req, tcp_hdr(skb));
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
@@ -1351,7 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
(dst = inet_csk_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL &&
peer->v4daddr == saddr) {
- if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+ if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1375,7 +1377,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
"request from %u.%u.%u.%u/%u\n",
NIPQUAD(saddr),
- ntohs(skb->h.th->source));
+ ntohs(tcp_hdr(skb)->source));
dst_release(dst);
goto drop_and_free;
}
@@ -1439,7 +1441,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
- newinet->mc_ttl = skb->nh.iph->ttl;
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newinet->opt)
inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
@@ -1481,8 +1483,8 @@ exit:
static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
- struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th = tcp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -1491,9 +1493,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
- th->source, skb->nh.iph->daddr,
- th->dest, inet_iif(skb));
+ nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+ iph->daddr, th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1513,15 +1514,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
{
+ const struct iphdr *iph = ip_hdr(skb);
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
- skb->nh.iph->daddr, skb->csum)) {
+ if (!tcp_v4_check(skb->len, iph->saddr,
+ iph->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
- skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, IPPROTO_TCP, 0);
if (skb->len <= 76) {
@@ -1555,7 +1558,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+ if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
@@ -1563,7 +1566,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+ if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
@@ -1581,7 +1584,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
}
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+ if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
@@ -1610,6 +1613,7 @@ csum_err:
int tcp_v4_rcv(struct sk_buff *skb)
{
+ const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
int ret;
@@ -1623,7 +1627,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
- th = skb->h.th;
+ th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
goto bad_packet;
@@ -1634,23 +1638,21 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
- tcp_v4_checksum_init(skb)))
+ if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;
- th = skb->h.th;
+ th = tcp_hdr(skb);
+ iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
+ TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
- skb->nh.iph->daddr, th->dest,
- inet_iif(skb));
-
+ sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+ iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1724,8 +1726,7 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
- skb->nh.iph->daddr,
- th->dest,
+ iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
@@ -1770,7 +1771,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
if (peer) {
if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
- (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+ (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1792,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
- (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+ (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
peer->tcp_ts = tcptw->tw_ts_recent;
@@ -1890,7 +1891,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
tcp_cleanup_congestion_control(sk);
/* Cleanup up the write buffer. */
- sk_stream_writequeue_purge(sk);
+ tcp_write_queue_purge(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge(&tp->out_of_order_queue);
@@ -2293,13 +2294,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
req);
}
-static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
+static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
{
int timer_active;
unsigned long timer_expires;
- struct tcp_sock *tp = tcp_sk(sp);
- const struct inet_connection_sock *icsk = inet_csk(sp);
- struct inet_sock *inet = inet_sk(sp);
+ struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
__be32 dest = inet->daddr;
__be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
@@ -2311,9 +2312,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ } else if (timer_pending(&sk->sk_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = sk->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = jiffies;
@@ -2321,17 +2322,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5d %8d %lu %d %p %u %u %u %u %d",
- i, src, srcp, dest, destp, sp->sk_state,
+ i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
- sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
+ sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
(tp->rcv_nxt - tp->copied_seq),
timer_active,
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- sock_i_uid(sp),
+ sock_i_uid(sk),
icsk->icsk_probes_out,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ sock_i_ino(sk),
+ atomic_read(&sk->sk_refcnt), sk,
icsk->icsk_rto,
icsk->icsk_ack.ato,
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21cb..43294ad9f63e 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
* 3. calc smoothed OWD (SOWD).
* Most ideas come from the original TCP-LP implementation.
*/
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
{
struct lp *lp = inet_csk_ca(sk);
s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
* newReno in increase case.
* We work it out by following the idea from TCP-LP's paper directly
*/
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last)));
+
/* calc inference */
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
}
static struct tcp_congestion_ops tcp_lp = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_lp_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_lp_rtt_sample,
.pkts_acked = tcp_lp_pkts_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6b5c64f3c925..a12b08fca5ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
tw->tw_substate = TCP_TIME_WAIT;
tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+ tcptw->tw_ts_recent_stamp = get_seconds();
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
@@ -208,7 +208,7 @@ kill:
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
- tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+ tcptw->tw_ts_recent_stamp = get_seconds();
}
inet_twsk_put(tw);
@@ -246,7 +246,7 @@ kill:
if (paws_reject)
NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
- if(!th->rst) {
+ if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
*
* If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tcp_alloc_md5sig_pool() == NULL)
BUG();
}
- } while(0);
+ } while (0);
#endif
/* Linkage updates. */
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
/* Now setup tcp_sock */
newtp = tcp_sk(newsk);
newtp->pred_flags = 0;
- newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
+ newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
+ newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
tcp_prequeue_init(newtp);
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
- newtp->rcv_wup = treq->rcv_isn + 1;
newtp->write_seq = treq->snt_isn + 1;
newtp->pushed_seq = newtp->write_seq;
- newtp->copied_seq = treq->rcv_isn + 1;
newtp->rx_opt.saw_tstamp = 0;
@@ -440,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
+ if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
if (sysctl_tcp_fack)
newtp->rx_opt.sack_ok |= 2;
}
@@ -455,12 +453,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
newtp->window_clamp = min(newtp->window_clamp, 65535U);
}
- newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+ newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
+ newtp->rx_opt.snd_wscale);
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
newtp->rx_opt.ts_recent = req->ts_recent;
- newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ newtp->rx_opt.ts_recent_stamp = get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
newtp->rx_opt.ts_recent_stamp = 0;
@@ -490,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
- struct tcphdr *th = skb->h.th;
+ const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
struct tcp_options_received tmp_opt;
@@ -506,7 +505,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+ tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
paws_reject = tcp_paws_check(&tmp_opt, th->rst);
}
}
@@ -712,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int state = child->sk_state;
if (!sock_owned_by_user(child)) {
- ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
+ ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
+ skb->len);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dc151139b5af..53232dd6fb48 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -40,7 +40,6 @@
#include <linux/compiler.h>
#include <linux/module.h>
-#include <linux/smp_lock.h>
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -62,14 +61,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-static void update_send_head(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct sk_buff *skb)
{
- sk->sk_send_head = skb->next;
- if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
- sk->sk_send_head = NULL;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tcp_packets_out_inc(sk, tp, skb);
+ tcp_packets_out_inc(sk, skb);
}
/* SND.NXT, if window was not shrunk.
@@ -78,8 +76,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
* Anything in between SND.UNA...SND.UNA+SND.WND also can be already
* invalid. OK, let's make this for now:
*/
-static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+static inline __u32 tcp_acceptable_seq(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
return tp->snd_nxt;
else
@@ -238,7 +238,7 @@ static u16 tcp_select_window(struct sock *sk)
u32 new_win = __tcp_select_window(sk);
/* Never shrink the offered window */
- if(new_win < cur_win) {
+ if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
* we will not be able to advertise a zero
@@ -289,10 +289,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
(TCPOPT_SACK << 8) |
(TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
TCPOLEN_SACK_PERBLOCK)));
- for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+
+ for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
+
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
tp->rx_opt.eff_sacks--;
@@ -337,7 +339,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
*/
*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
if (ts) {
- if(sack)
+ if (sack)
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
@@ -349,7 +351,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
TCPOLEN_TIMESTAMP);
*ptr++ = htonl(tstamp); /* TSVAL */
*ptr++ = htonl(ts_recent); /* TSECR */
- } else if(sack)
+ } else if (sack)
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) |
@@ -406,7 +408,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
/* If congestion control is doing timestamping, we must
* take such a timestamp before we potentially clone/copy.
*/
- if (icsk->icsk_ca_ops->rtt_sample)
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
__net_timestamp(skb);
if (likely(clone_it)) {
@@ -430,7 +432,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
sysctl_flags = 0;
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps) {
+ if (sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
}
@@ -465,11 +467,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
#endif
- th = (struct tcphdr *) skb_push(skb, tcp_header_size);
- skb->h.th = th;
+ skb_push(skb, tcp_header_size);
+ skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
/* Build TCP header and checksum it. */
+ th = tcp_hdr(skb);
th->source = inet->sport;
th->dest = inet->dport;
th->seq = htonl(tcb->seq);
@@ -515,7 +518,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
md5 ? &md5_hash_location :
#endif
NULL);
- TCP_ECN_send(sk, tp, skb, tcp_header_size);
+ TCP_ECN_send(sk, skb, tcp_header_size);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -524,7 +527,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tp->af_specific->calc_md5_hash(md5_hash_location,
md5,
sk, NULL, NULL,
- skb->h.th,
+ tcp_hdr(skb),
sk->sk_protocol,
skb->len);
}
@@ -545,7 +548,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (likely(err <= 0))
return err;
- tcp_enter_cwr(sk);
+ tcp_enter_cwr(sk, 1);
return net_xmit_eval(err);
@@ -567,12 +570,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Advance write_seq and place onto the write_queue. */
tp->write_seq = TCP_SKB_CB(skb)->end_seq;
skb_header_release(skb);
- __skb_queue_tail(&sk->sk_write_queue, skb);
+ tcp_add_write_queue_tail(sk, skb);
sk_charge_skb(sk, skb);
-
- /* Queue it, remembering where we must start sending. */
- if (sk->sk_send_head == NULL)
- sk->sk_send_head = skb;
}
static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +704,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
/* Link BUFF into the send queue. */
skb_header_release(buff);
- __skb_append(skb, buff, &sk->sk_write_queue);
+ tcp_insert_write_queue_after(skb, buff, sk);
return 0;
}
@@ -736,7 +735,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
}
skb_shinfo(skb)->nr_frags = k;
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->data_len -= len;
skb->len = skb->data_len;
}
@@ -930,8 +929,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out = tp->packets_out;
if (packets_out >= tp->snd_cwnd) {
@@ -943,7 +943,8 @@ static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
+ if (sysctl_tcp_slow_start_after_idle &&
+ (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
tcp_cwnd_application_limited(sk);
}
}
@@ -1033,8 +1034,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
if (nonagle & TCP_NAGLE_PUSH)
return 1;
- /* Don't use the nagle rule for urgent data (or for the final FIN). */
- if (tp->urg_mode ||
+ /* Don't use the nagle rule for urgent data (or for the final FIN).
+ * Nagle can be ignored during F-RTO too (see RFC4138).
+ */
+ if (tp->urg_mode || (tp->frto_counter == 2) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
return 1;
@@ -1055,7 +1058,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
return !after(end_seq, tp->snd_una + tp->snd_wnd);
}
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
* should be put on the wire right now. If so, it returns the number of
* packets allowed by the congestion window.
*/
@@ -1078,15 +1081,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
return cwnd_quota;
}
-static inline int tcp_skb_is_last(const struct sock *sk,
- const struct sk_buff *skb)
+int tcp_may_send_now(struct sock *sk)
{
- return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
-{
- struct sk_buff *skb = sk->sk_send_head;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb = tcp_send_head(sk);
return (skb &&
tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1142,7 +1140,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* Link BUFF into the send queue. */
skb_header_release(buff);
- __skb_append(skb, buff, &sk->sk_write_queue);
+ tcp_insert_write_queue_after(skb, buff, sk);
return 0;
}
@@ -1152,8 +1150,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
@@ -1248,10 +1247,10 @@ static int tcp_mtu_probe(struct sock *sk)
/* Have enough data in the send queue to probe? */
len = 0;
- if ((skb = sk->sk_send_head) == NULL)
+ if ((skb = tcp_send_head(sk)) == NULL)
return -1;
while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
- skb = skb->next;
+ skb = tcp_write_queue_next(sk, skb);
if (len < probe_size)
return -1;
@@ -1278,9 +1277,9 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
sk_charge_skb(sk, nskb);
- skb = sk->sk_send_head;
- __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
- sk->sk_send_head = nskb;
+ skb = tcp_send_head(sk);
+ tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_advance_send_head(sk, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1291,7 +1290,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
while (len < probe_size) {
- next = skb->next;
+ next = tcp_write_queue_next(sk, skb);
copy = min_t(int, skb->len, probe_size - len);
if (nskb->ip_summed)
@@ -1304,7 +1303,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* We've eaten all the data from this skb.
* Throw it away. */
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
sk_stream_free_skb(sk, skb);
} else {
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1332,7 +1331,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* Decrement cwnd here because we are sending
* effectively two packets. */
tp->snd_cwnd--;
- update_send_head(sk, tp, nskb);
+ update_send_head(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1376,7 +1375,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
sent_pkts = 1;
}
- while ((skb = sk->sk_send_head)) {
+ while ((skb = tcp_send_head(sk))) {
unsigned int limit;
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1395,7 +1394,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (tcp_tso_should_defer(sk, tp, skb))
+ if (tcp_tso_should_defer(sk, skb))
break;
}
@@ -1424,31 +1423,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
- update_send_head(sk, tp, skb);
+ update_send_head(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
sent_pkts++;
}
if (likely(sent_pkts)) {
- tcp_cwnd_validate(sk, tp);
+ tcp_cwnd_validate(sk);
return 0;
}
- return !tp->packets_out && sk->sk_send_head;
+ return !tp->packets_out && tcp_send_head(sk);
}
/* Push out any pending frames which were held back due to
* TCP_CORK or attempt at coalescing tiny packets.
* The socket must be locked by the caller.
*/
-void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
- unsigned int cur_mss, int nonagle)
+void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+ int nonagle)
{
- struct sk_buff *skb = sk->sk_send_head;
+ struct sk_buff *skb = tcp_send_head(sk);
if (skb) {
if (tcp_write_xmit(sk, cur_mss, nonagle))
- tcp_check_probe_timer(sk, tp);
+ tcp_check_probe_timer(sk);
}
}
@@ -1458,7 +1457,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
void tcp_push_one(struct sock *sk, unsigned int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = sk->sk_send_head;
+ struct sk_buff *skb = tcp_send_head(sk);
unsigned int tso_segs, cwnd_quota;
BUG_ON(!skb || skb->len < mss_now);
@@ -1492,8 +1491,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
- update_send_head(sk, tp, skb);
- tcp_cwnd_validate(sk, tp);
+ update_send_head(sk, skb);
+ tcp_cwnd_validate(sk);
return;
}
}
@@ -1607,6 +1606,9 @@ u32 __tcp_select_window(struct sock *sk)
*/
if (window <= free_space - mss || window > free_space)
window = (free_space/mss)*mss;
+ else if (mss == full_space &&
+ free_space > window + full_space/2)
+ window = free_space;
}
return window;
@@ -1616,7 +1618,7 @@ u32 __tcp_select_window(struct sock *sk)
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = skb->next;
+ struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
/* The first test we must make is that neither of these two
* SKB's are still referenced by someone else.
@@ -1626,7 +1628,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
u16 flags = TCP_SKB_CB(skb)->flags;
/* Also punt if next skb has been SACK'd. */
- if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
return;
/* Next skb is out of window. */
@@ -1648,9 +1650,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
clear_all_retrans_hints(tp);
/* Ok. We will be able to collapse the packet. */
- __skb_unlink(next_skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(next_skb, sk);
- memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+ skb_copy_from_linear_data(next_skb,
+ skb_put(skb, next_skb_size),
+ next_skb_size);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1702,7 +1706,9 @@ void tcp_simple_retransmit(struct sock *sk)
unsigned int mss = tcp_current_mss(sk, 0);
int lost = 0;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (skb->len > mss &&
!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1784,13 +1790,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
/* Collapse two adjacent packets if worthwhile and we can. */
- if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
- (skb->len < (cur_mss >> 1)) &&
- (skb->next != sk->sk_send_head) &&
- (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
- (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
- (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
- (sysctl_tcp_retrans_collapse != 0))
+ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+ (skb->len < (cur_mss >> 1)) &&
+ (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+ (!tcp_skb_is_last(sk, skb)) &&
+ (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+ (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+ (sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1800,9 +1806,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* retransmit when old data is attached. So strip it off
* since it is cheap to do so and saves bytes on the network.
*/
- if(skb->len > 0 &&
- (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+ if (skb->len > 0 &&
+ (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+ tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
skb_shinfo(skb)->gso_segs = 1;
@@ -1868,15 +1874,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
skb = tp->retransmit_skb_hint;
packet_cnt = tp->retransmit_cnt_hint;
}else{
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
packet_cnt = 0;
}
/* First pass: retransmit lost packets. */
if (tp->lost_out) {
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ if (skb == tcp_send_head(sk))
+ break;
/* we could do better than to assign each time */
tp->retransmit_skb_hint = skb;
tp->retransmit_cnt_hint = packet_cnt;
@@ -1902,8 +1910,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
else
NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
- if (skb ==
- skb_peek(&sk->sk_write_queue))
+ if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
@@ -1933,18 +1940,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
* segments to send.
*/
- if (tcp_may_send_now(sk, tp))
+ if (tcp_may_send_now(sk))
return;
if (tp->forward_skb_hint) {
skb = tp->forward_skb_hint;
packet_cnt = tp->forward_cnt_hint;
} else{
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
packet_cnt = 0;
}
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
tp->forward_cnt_hint = packet_cnt;
tp->forward_skb_hint = skb;
@@ -1969,7 +1978,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
break;
}
- if (skb == skb_peek(&sk->sk_write_queue))
+ if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
@@ -1985,7 +1994,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
void tcp_send_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
int mss_now;
/* Optimization, tack on the FIN if we have a queue of
@@ -1994,7 +2003,7 @@ void tcp_send_fin(struct sock *sk)
*/
mss_now = tcp_current_mss(sk, 1);
- if (sk->sk_send_head != NULL) {
+ if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
@@ -2021,17 +2030,16 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
tcp_queue_skb(sk, skb);
}
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
/* We get here when a process closes a file descriptor (either due to
* an explicit close() or as a byproduct of exit()'ing) and there
* was unread data in the receive queue. This behavior is recommended
- * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
+ * by RFC 2525, section 2.17. -DaveM
*/
void tcp_send_active_reset(struct sock *sk, gfp_t priority)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
/* NOTE: No TCP options attached and we never retransmit this. */
@@ -2051,7 +2059,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb_shinfo(skb)->gso_type = 0;
/* Send it off. */
- TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2067,7 +2075,7 @@ int tcp_send_synack(struct sock *sk)
{
struct sk_buff* skb;
- skb = skb_peek(&sk->sk_write_queue);
+ skb = tcp_write_queue_head(sk);
if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
@@ -2077,9 +2085,9 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
return -ENOMEM;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
skb_header_release(nskb);
- __skb_queue_head(&sk->sk_write_queue, nskb);
+ __tcp_add_write_queue_head(sk, nskb);
sk_stream_free_skb(sk, skb);
sk_charge_skb(sk, nskb);
skb = nskb;
@@ -2129,8 +2137,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (md5)
tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
#endif
- skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+ skb_push(skb, tcp_header_size);
+ skb_reset_transport_header(skb);
+ th = tcp_hdr(skb);
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
@@ -2184,7 +2194,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
tp->af_specific->calc_md5_hash(md5_hash_location,
md5,
NULL, dst, req,
- skb->h.th, sk->sk_protocol,
+ tcp_hdr(skb), sk->sk_protocol,
skb->len);
}
#endif
@@ -2267,7 +2277,7 @@ int tcp_connect(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER);
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
- TCP_ECN_send_syn(sk, tp, buff);
+ TCP_ECN_send_syn(sk, buff);
TCP_SKB_CB(buff)->sacked = 0;
skb_shinfo(buff)->gso_segs = 1;
skb_shinfo(buff)->gso_size = 0;
@@ -2281,7 +2291,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
skb_header_release(buff);
- __skb_queue_tail(&sk->sk_write_queue, buff);
+ __tcp_add_write_queue_tail(sk, buff);
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2359,7 +2369,6 @@ void tcp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != TCP_CLOSE) {
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
/* We are not putting this on the write queue, so
@@ -2385,7 +2394,7 @@ void tcp_send_ack(struct sock *sk)
skb_shinfo(buff)->gso_type = 0;
/* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
@@ -2437,7 +2446,7 @@ int tcp_write_wakeup(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- if ((skb = sk->sk_send_head) != NULL &&
+ if ((skb = tcp_send_head(sk)) != NULL &&
before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
int err;
unsigned int mss = tcp_current_mss(sk, 0);
@@ -2463,7 +2472,7 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err) {
- update_send_head(sk, tp, skb);
+ update_send_head(sk, skb);
}
return err;
} else {
@@ -2487,7 +2496,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk);
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tp->packets_out || !tcp_send_head(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 61f406f27294..3938d5dbdf20 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -26,6 +26,8 @@
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/kfifo.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
#include <linux/vmalloc.h>
#include <net/tcp.h>
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
MODULE_DESCRIPTION("TCP cwnd snooper");
MODULE_LICENSE("GPL");
-static int port = 0;
+static int port __read_mostly = 0;
MODULE_PARM_DESC(port, "Port to match (0=all)");
module_param(port, int, 0);
-static int bufsize = 64*1024;
+static int bufsize __read_mostly = 64*1024;
MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
module_param(bufsize, int, 0);
+static int full __read_mostly;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
+module_param(full, int, 0);
+
static const char procname[] = "tcpprobe";
struct {
- struct kfifo *fifo;
- spinlock_t lock;
+ struct kfifo *fifo;
+ spinlock_t lock;
wait_queue_head_t wait;
- struct timeval tstart;
+ ktime_t start;
+ u32 lastcwnd;
} tcpw;
+/*
+ * Print to log with timestamps.
+ * FIXME: causes an extra copy
+ */
static void printl(const char *fmt, ...)
{
va_list args;
int len;
- struct timeval now;
+ struct timespec tv;
char tbuf[256];
va_start(args, fmt);
- do_gettimeofday(&now);
+ /* want monotonic time since start of tcp_probe */
+ tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
- now.tv_sec -= tcpw.tstart.tv_sec;
- now.tv_usec -= tcpw.tstart.tv_usec;
- if (now.tv_usec < 0) {
- --now.tv_sec;
- now.tv_usec += 1000000;
- }
-
- len = sprintf(tbuf, "%lu.%06lu ",
- (unsigned long) now.tv_sec,
- (unsigned long) now.tv_usec);
+ len = sprintf(tbuf, "%lu.%09lu ",
+ (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
va_end(args);
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...)
wake_up(&tcpw.wait);
}
-static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+/*
+ * Hook inserted to be called before each receive packet.
+ * Note: arguments must match tcp_rcv_established()!
+ */
+static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ struct tcphdr *th, unsigned len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
- if (port == 0 || ntohs(inet->dport) == port ||
- ntohs(inet->sport) == port) {
+ /* Only update if port matches */
+ if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
+ && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
NIPQUAD(inet->saddr), ntohs(inet->sport),
NIPQUAD(inet->daddr), ntohs(inet->dport),
- size, tp->snd_nxt, tp->snd_una,
+ skb->len, tp->snd_nxt, tp->snd_una,
tp->snd_cwnd, tcp_current_ssthresh(sk),
- tp->snd_wnd);
+ tp->snd_wnd, tp->srtt >> 3);
+ tcpw.lastcwnd = tp->snd_cwnd;
}
jprobe_return();
return 0;
}
-static struct jprobe tcp_send_probe = {
+static struct jprobe tcp_probe = {
.kp = {
- .symbol_name = "tcp_sendmsg",
+ .symbol_name = "tcp_rcv_established",
},
- .entry = JPROBE_ENTRY(jtcp_sendmsg),
+ .entry = JPROBE_ENTRY(jtcp_rcv_established),
};
static int tcpprobe_open(struct inode * inode, struct file * file)
{
kfifo_reset(tcpw.fifo);
- do_gettimeofday(&tcpw.tstart);
+ tcpw.start = ktime_get();
return 0;
}
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void)
if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
goto err0;
- ret = register_jprobe(&tcp_send_probe);
+ ret = register_jprobe(&tcp_probe);
if (ret)
goto err1;
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void)
{
kfifo_free(tcpw.fifo);
proc_net_remove(procname);
- unregister_jprobe(&tcp_send_probe);
+ unregister_jprobe(&tcp_probe);
}
module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1bea..2ca97b20929d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tp->packets_out || !tcp_send_head(sk)) {
icsk->icsk_probes_out = 0;
return;
}
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out)
goto out;
- BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+ BUG_TRAP(!tcp_write_queue_empty(sk));
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out;
}
tcp_enter_loss(sk, 0);
- tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+ tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
__sk_dst_reset(sk);
goto out_reset_timer;
}
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk, 0);
}
- if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+ if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
/* Retransmission failed because of local congestion,
* do not backoff.
*/
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || sk->sk_send_head)
+ if (tp->packets_out || tcp_send_head(sk))
goto resched;
elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 5c484dceb967..73e19cf7df21 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
#include <net/tcp.h>
+#include "tcp_vegas.h"
+
/* Default values of the Vegas variables, in fixed-point representation
* with V_PARAM_SHIFT bits to the right of the binary point.
*/
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
-/* Vegas variables */
-struct vegas {
- u32 beg_snd_nxt; /* right edge during last RTT */
- u32 beg_snd_una; /* left edge during last RTT */
- u32 beg_snd_cwnd; /* saves the size of the cwnd */
- u8 doing_vegas_now;/* if true, do vegas for this RTT */
- u16 cntRTT; /* # of RTTs measured within last RTT */
- u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
- u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
-};
-
/* There are several situations when we must "re-start" Vegas:
*
* o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
* Instead we must wait until the completion of an RTT during
* which we actually receive ACKs.
*/
-static inline void vegas_enable(struct sock *sk)
+static void vegas_enable(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
vegas->doing_vegas_now = 0;
}
-static void tcp_vegas_init(struct sock *sk)
+void tcp_vegas_init(struct sock *sk)
{
struct vegas *vegas = inet_csk_ca(sk);
vegas->baseRTT = 0x7fffffff;
vegas_enable(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_init);
/* Do RTT sampling needed for Vegas.
* Basically we:
@@ -120,10 +112,13 @@ static void tcp_vegas_init(struct sock *sk)
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct vegas *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = ktime_to_us(net_timedelta(last)) + 1;
/* Filter to find propagation delay: */
if (vrtt < vegas->baseRTT)
@@ -135,8 +130,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
vegas->minRTT = min(vegas->minRTT, vrtt);
vegas->cntRTT++;
}
+EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
-static void tcp_vegas_state(struct sock *sk, u8 ca_state)
+void tcp_vegas_state(struct sock *sk, u8 ca_state)
{
if (ca_state == TCP_CA_Open)
@@ -144,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
else
vegas_disable(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_state);
/*
* If the connection is idle and we are restarting,
@@ -154,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
* packets, _then_ we can make Vegas calculations
* again.
*/
-static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_CWND_RESTART ||
event == CA_EVENT_TX_START)
tcp_vegas_init(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int flag)
@@ -336,30 +334,29 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
}
/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct sock *sk, u32 ext,
- struct sk_buff *skb)
+void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info *info;
-
- info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
- sizeof(*info)));
-
- info->tcpv_enabled = ca->doing_vegas_now;
- info->tcpv_rttcnt = ca->cntRTT;
- info->tcpv_rtt = ca->baseRTT;
- info->tcpv_minrtt = ca->minRTT;
- rtattr_failure: ;
+ struct tcpvegas_info info = {
+ .tcpv_enabled = ca->doing_vegas_now,
+ .tcpv_rttcnt = ca->cntRTT,
+ .tcpv_rtt = ca->baseRTT,
+ .tcpv_minrtt = ca->minRTT,
+ };
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
+EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
static struct tcp_congestion_ops tcp_vegas = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_vegas_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_vegas_rtt_calc,
+ .pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 000000000000..502fa8183634
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
+/*
+ * TCP Vegas congestion control interface
+ */
+#ifndef __TCP_VEGAS_H
+#define __TCP_VEGAS_H 1
+
+/* Vegas variables */
+struct vegas {
+ u32 beg_snd_nxt; /* right edge during last RTT */
+ u32 beg_snd_una; /* left edge during last RTT */
+ u32 beg_snd_cwnd; /* saves the size of the cwnd */
+ u8 doing_vegas_now;/* if true, do vegas for this RTT */
+ u16 cntRTT; /* # of RTTs measured within last RTT */
+ u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
+ u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
+};
+
+extern void tcp_vegas_init(struct sock *sk);
+extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
+extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
+extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+
+#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6c..9edb340f2f95 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
}
/* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct veno *veno = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = ktime_to_us(net_timedelta(last)) + 1;
/* Filter to find propagation delay: */
if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
}
static struct tcp_congestion_ops tcp_veno = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
.cong_avoid = tcp_veno_cong_avoid,
- .rtt_sample = tcp_veno_rtt_calc,
+ .pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
.cwnd_event = tcp_veno_cwnd_event,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9c..e61e09dd513e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets.
* but all westwood needs is the last sample of srtt.
*/
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct westwood *w = inet_csk_ca(sk);
if (cnt > 0)
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
- switch(event) {
+ switch (event) {
case CA_EVENT_FAST_ACK:
westwood_fast_bw(sk);
break;
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
{
const struct westwood *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct rtattr *rta;
- struct tcpvegas_info *info;
-
- rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
- info = RTA_DATA(rta);
- info->tcpv_enabled = 1;
- info->tcpv_rttcnt = 0;
- info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
- info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
- rtattr_failure: ;
+ struct tcpvegas_info info = {
+ .tcpv_enabled = 1,
+ .tcpv_rtt = jiffies_to_usecs(ca->rtt),
+ .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+ };
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
new file mode 100644
index 000000000000..545ed237ab53
--- /dev/null
+++ b/net/ipv4/tcp_yeah.c
@@ -0,0 +1,268 @@
+/*
+ *
+ * YeAH TCP
+ *
+ * For further details look at:
+ * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+ *
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+
+#include <net/tcp.h>
+
+#include "tcp_vegas.h"
+
+#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck
+#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt
+#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
+#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
+#define TCP_YEAH_PHY 8 //lin maximum delta from base
+#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
+
+#define TCP_SCALABLE_AI_CNT 100U
+
+/* YeAH variables */
+struct yeah {
+ struct vegas vegas; /* must be first */
+
+ /* YeAH */
+ u32 lastQ;
+ u32 doing_reno_now;
+
+ u32 reno_count;
+ u32 fast_count;
+
+ u32 pkts_acked;
+};
+
+static void tcp_yeah_init(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ tcp_vegas_init(sk);
+
+ yeah->doing_reno_now = 0;
+ yeah->lastQ = 0;
+
+ yeah->reno_count = 2;
+
+ /* Ensure the MD arithmetic works. This is somewhat pedantic,
+ * since I don't think we will see a cwnd this large. :) */
+ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
+
+}
+
+
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ if (icsk->icsk_ca_state == TCP_CA_Open)
+ yeah->pkts_acked = pkts_acked;
+
+ tcp_vegas_pkts_acked(sk, pkts_acked, last);
+}
+
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
+ u32 seq_rtt, u32 in_flight, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
+ else if (!yeah->doing_reno_now) {
+ /* Scalable */
+
+ tp->snd_cwnd_cnt+=yeah->pkts_acked;
+ if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ }
+
+ yeah->pkts_acked = 1;
+
+ } else {
+ /* Reno */
+
+ if (tp->snd_cwnd_cnt < tp->snd_cwnd)
+ tp->snd_cwnd_cnt++;
+
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ }
+ }
+
+ /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
+ *
+ * These are so named because they represent the approximate values
+ * of snd_una and snd_nxt at the beginning of the current RTT. More
+ * precisely, they represent the amount of data sent during the RTT.
+ * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+ * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
+ * bytes of data have been ACKed during the course of the RTT, giving
+ * an "actual" rate of:
+ *
+ * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
+ *
+ * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
+ * because delayed ACKs can cover more than one segment, so they
+ * don't line up yeahly with the boundaries of RTTs.
+ *
+ * Another unfortunate fact of life is that delayed ACKs delay the
+ * advance of the left edge of our send window, so that the number
+ * of bytes we send in an RTT is often less than our cwnd will allow.
+ * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+ */
+
+ if (after(ack, yeah->vegas.beg_snd_nxt)) {
+
+ /* We do the Vegas calculations only if we got enough RTT
+ * samples that we can be reasonably sure that we got
+ * at least one RTT sample that wasn't from a delayed ACK.
+ * If we only had 2 samples total,
+ * then that means we're getting only 1 ACK per RTT, which
+ * means they're almost certainly delayed ACKs.
+ * If we have 3 samples, we should be OK.
+ */
+
+ if (yeah->vegas.cntRTT > 2) {
+ u32 rtt, queue;
+ u64 bw;
+
+ /* We have enough RTT samples, so, using the Vegas
+ * algorithm, we determine if we should increase or
+ * decrease cwnd, and by how much.
+ */
+
+ /* Pluck out the RTT we are using for the Vegas
+ * calculations. This is the min RTT seen during the
+ * last RTT. Taking the min filters out the effects
+ * of delayed ACKs, at the cost of noticing congestion
+ * a bit later.
+ */
+ rtt = yeah->vegas.minRTT;
+
+ /* Compute excess number of packets above bandwidth
+ * Avoid doing full 64 bit divide.
+ */
+ bw = tp->snd_cwnd;
+ bw *= rtt - yeah->vegas.baseRTT;
+ do_div(bw, rtt);
+ queue = bw;
+
+ if (queue > TCP_YEAH_ALPHA ||
+ rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
+ if (queue > TCP_YEAH_ALPHA
+ && tp->snd_cwnd > yeah->reno_count) {
+ u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+ tp->snd_cwnd >> TCP_YEAH_EPSILON);
+
+ tp->snd_cwnd -= reduction;
+
+ tp->snd_cwnd = max(tp->snd_cwnd,
+ yeah->reno_count);
+
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
+
+ if (yeah->reno_count <= 2)
+ yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ else
+ yeah->reno_count++;
+
+ yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
+ 0xffffffU);
+ } else {
+ yeah->fast_count++;
+
+ if (yeah->fast_count > TCP_YEAH_ZETA) {
+ yeah->reno_count = 2;
+ yeah->fast_count = 0;
+ }
+
+ yeah->doing_reno_now = 0;
+ }
+
+ yeah->lastQ = queue;
+
+ }
+
+ /* Save the extent of the current window so we can use this
+ * at the end of the next RTT.
+ */
+ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
+ yeah->vegas.beg_snd_nxt = tp->snd_nxt;
+ yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+
+ /* Wipe the slate clean for the next RTT. */
+ yeah->vegas.cntRTT = 0;
+ yeah->vegas.minRTT = 0x7fffffff;
+ }
+}
+
+static u32 tcp_yeah_ssthresh(struct sock *sk) {
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+ u32 reduction;
+
+ if (yeah->doing_reno_now < TCP_YEAH_RHO) {
+ reduction = yeah->lastQ;
+
+ reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
+
+ reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ } else
+ reduction = max(tp->snd_cwnd>>1,2U);
+
+ yeah->fast_count = 0;
+ yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+ return tp->snd_cwnd - reduction;
+}
+
+static struct tcp_congestion_ops tcp_yeah = {
+ .flags = TCP_CONG_RTT_STAMP,
+ .init = tcp_yeah_init,
+ .ssthresh = tcp_yeah_ssthresh,
+ .cong_avoid = tcp_yeah_cong_avoid,
+ .min_cwnd = tcp_reno_min_cwnd,
+ .set_state = tcp_vegas_state,
+ .cwnd_event = tcp_vegas_cwnd_event,
+ .get_info = tcp_vegas_get_info,
+ .pkts_acked = tcp_yeah_pkts_acked,
+
+ .owner = THIS_MODULE,
+ .name = "yeah",
+};
+
+static int __init tcp_yeah_register(void)
+{
+ BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+ tcp_register_congestion_control(&tcp_yeah);
+ return 0;
+}
+
+static void __exit tcp_yeah_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_yeah);
+}
+
+module_init(tcp_yeah_register);
+module_exit(tcp_yeah_unregister);
+
+MODULE_AUTHOR("Angelo P. Castellani");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("YeAH TCP");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fc620a7c1db4..113e0c4c8a92 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock);
static int udp_port_rover;
-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
+/*
+ * Note about this hash function :
+ * Typical use is probably daddr = 0, only dport is going to vary hash
+ */
+static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr)
+{
+ addr ^= addr >> 16;
+ addr ^= addr >> 8;
+ return port ^ addr;
+}
+
+static inline int __udp_lib_port_inuse(unsigned int hash, int port,
+ __be32 daddr, struct hlist_head udptable[])
{
struct sock *sk;
struct hlist_node *node;
+ struct inet_sock *inet;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
- if (sk->sk_hash == num)
+ sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+ if (sk->sk_hash != hash)
+ continue;
+ inet = inet_sk(sk);
+ if (inet->num != port)
+ continue;
+ if (inet->rcv_saddr == daddr)
return 1;
+ }
return 0;
}
@@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
struct hlist_node *node;
struct hlist_head *head;
struct sock *sk2;
+ unsigned int hash;
int error = 1;
write_lock_bh(&udp_hash_lock);
@@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
int size;
- head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+ hash = hash_port_and_addr(result,
+ inet_sk(sk)->rcv_saddr);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
@@ -175,12 +197,23 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
;
}
result = best;
- for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
+ for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+ i++, result += UDP_HTABLE_SIZE) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0]
+ ((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
- if (! __udp_lib_lport_inuse(result, udptable))
+ hash = hash_port_and_addr(result, 0);
+ if (__udp_lib_port_inuse(hash, result,
+ 0, udptable))
+ continue;
+ if (!inet_sk(sk)->rcv_saddr)
+ break;
+
+ hash = hash_port_and_addr(result,
+ inet_sk(sk)->rcv_saddr);
+ if (! __udp_lib_port_inuse(hash, result,
+ inet_sk(sk)->rcv_saddr, udptable))
break;
}
if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -188,21 +221,41 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
*port_rover = snum = result;
} else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ hash = hash_port_and_addr(snum, 0);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
sk_for_each(sk2, node, head)
- if (sk2->sk_hash == snum &&
- sk2 != sk &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
- || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (*saddr_comp)(sk, sk2) )
+ if (sk2->sk_hash == hash &&
+ sk2 != sk &&
+ inet_sk(sk2)->num == snum &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2))
goto fail;
+
+ if (inet_sk(sk)->rcv_saddr) {
+ hash = hash_port_and_addr(snum,
+ inet_sk(sk)->rcv_saddr);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+
+ sk_for_each(sk2, node, head)
+ if (sk2->sk_hash == hash &&
+ sk2 != sk &&
+ inet_sk(sk2)->num == snum &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if ==
+ sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2))
+ goto fail;
+ }
}
inet_sk(sk)->num = snum;
- sk->sk_hash = snum;
+ sk->sk_hash = hash;
if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
@@ -212,13 +265,13 @@ fail:
return error;
}
-__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
+int udp_get_port(struct sock *sk, unsigned short snum,
int (*scmp)(const struct sock *, const struct sock *))
{
return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
}
-inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -241,63 +294,77 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
- unsigned short hnum = ntohs(dport);
- int badness = -1;
+ unsigned int hash, hashwild;
+ int score, best = -1, hport = ntohs(dport);
+
+ hash = hash_port_and_addr(hport, daddr);
+ hashwild = hash_port_and_addr(hport, 0);
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+
+lookup:
+
+ sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
- int score = (sk->sk_family == PF_INET ? 1 : 0);
- if (inet->rcv_saddr) {
- if (inet->rcv_saddr != daddr)
- continue;
- score+=2;
- }
- if (inet->daddr) {
- if (inet->daddr != saddr)
- continue;
- score+=2;
- }
- if (inet->dport) {
- if (inet->dport != sport)
- continue;
- score+=2;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- continue;
- score+=2;
- }
- if(score == 9) {
- result = sk;
- break;
- } else if(score > badness) {
- result = sk;
- badness = score;
- }
+ if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
+ inet->num != hport)
+ continue;
+
+ score = (sk->sk_family == PF_INET ? 1 : 0);
+ if (inet->rcv_saddr) {
+ if (inet->rcv_saddr != daddr)
+ continue;
+ score+=2;
+ }
+ if (inet->daddr) {
+ if (inet->daddr != saddr)
+ continue;
+ score+=2;
}
+ if (inet->dport) {
+ if (inet->dport != sport)
+ continue;
+ score+=2;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ continue;
+ score+=2;
+ }
+ if (score == 9) {
+ result = sk;
+ goto found;
+ } else if (score > best) {
+ result = sk;
+ best = score;
+ }
+ }
+
+ if (hash != hashwild) {
+ hash = hashwild;
+ goto lookup;
}
+found:
if (result)
sock_hold(result);
read_unlock(&udp_hash_lock);
return result;
}
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
- __be16 loc_port, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
+ int hport, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
struct sock *s = sk;
- unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
+ inet->num != hport ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -329,8 +396,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
int harderr;
int err;
@@ -390,7 +457,7 @@ out:
sock_put(sk);
}
-__inline__ void udp_err(struct sk_buff *skb, u32 info)
+void udp_err(struct sk_buff *skb, u32 info)
{
return __udp4_lib_err(skb, info, udp_hash);
}
@@ -419,13 +486,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, int len )
{
unsigned int offset;
- struct udphdr *uh = skb->h.uh;
+ struct udphdr *uh = udp_hdr(skb);
__wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) {
/*
* Only one fragment on the socket.
*/
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
} else {
@@ -434,7 +502,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
- offset = skb->h.raw - skb->data;
+ offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
skb->ip_summed = CHECKSUM_NONE;
@@ -469,7 +537,7 @@ static int udp_push_pending_frames(struct sock *sk)
/*
* Create a UDP header
*/
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
@@ -765,38 +833,38 @@ out:
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
- switch(cmd)
+ switch (cmd) {
+ case SIOCOUTQ:
{
- case SIOCOUTQ:
- {
- int amount = atomic_read(&sk->sk_wmem_alloc);
- return put_user(amount, (int __user *)arg);
- }
+ int amount = atomic_read(&sk->sk_wmem_alloc);
+ return put_user(amount, (int __user *)arg);
+ }
- case SIOCINQ:
- {
- struct sk_buff *skb;
- unsigned long amount;
-
- amount = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
- /*
- * We will only return the amount
- * of this packet since that is all
- * that will be read.
- */
- amount = skb->len - sizeof(struct udphdr);
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
+ case SIOCINQ:
+ {
+ struct sk_buff *skb;
+ unsigned long amount;
+
+ amount = 0;
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb != NULL) {
+ /*
+ * We will only return the amount
+ * of this packet since that is all
+ * that will be read.
+ */
+ amount = skb->len - sizeof(struct udphdr);
}
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ return put_user(amount, (int __user *)arg);
+ }
- default:
- return -ENOIOCTLCMD;
+ default:
+ return -ENOIOCTLCMD;
}
- return(0);
+
+ return 0;
}
/*
@@ -810,7 +878,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
- int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
+ unsigned int ulen, copied;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
/*
* Check any passed addresses
@@ -826,28 +896,25 @@ try_again:
if (!skb)
goto out;
- copied = skb->len - sizeof(struct udphdr);
- if (copied > len) {
- copied = len;
+ ulen = skb->len - sizeof(struct udphdr);
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
- }
/*
- * Decide whether to checksum and/or copy data.
- *
- * UDP: checksum may have been computed in HW,
- * (re-)compute it if message is truncated.
- * UDP-Lite: always needs to checksum, no HW support.
+ * If checksum is needed at all, try to do it while copying the
+ * data. If the data is truncated, or if we only want a partial
+ * coverage checksum (UDP-Lite), do it before the copy.
*/
- copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
- if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
- if (__udp_lib_checksum_complete(skb))
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
- copy_only = 1;
}
- if (copy_only)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
@@ -866,8 +933,8 @@ try_again:
if (sin)
{
sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_port = udp_hdr(skb)->source;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
@@ -875,7 +942,7 @@ try_again:
err = copied;
if (flags & MSG_TRUNC)
- err = skb->len - sizeof(struct udphdr);
+ err = ulen;
out_free:
skb_free_datagram(sk, skb);
@@ -949,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
return 1;
/* Now we can get the pointers */
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
udpdata = (__u8 *)uh + sizeof(struct udphdr);
udpdata32 = (__be32 *)udpdata;
@@ -959,7 +1026,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
return 0;
- } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+ } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
} else
@@ -990,7 +1057,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
return 0;
/* Now we can update and verify the packet length... */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iphlen = iph->ihl << 2;
iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) {
@@ -1002,7 +1069,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- skb->h.raw = skb_pull(skb, len);
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
/* modify the protocol (it's ESP!) */
iph->protocol = IPPROTO_ESP;
@@ -1095,10 +1163,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
- if (__udp_lib_checksum_complete(skb))
+ if (sk->sk_filter) {
+ if (udp_lib_checksum_complete(skb))
goto drop;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
@@ -1128,33 +1195,49 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
{
- struct sock *sk;
+ struct sock *sk, *skw, *sknext;
int dif;
+ int hport = ntohs(uh->dest);
+ unsigned int hash = hash_port_and_addr(hport, daddr);
+ unsigned int hashwild = hash_port_and_addr(hport, 0);
- read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
- if (sk) {
- struct sock *sknext = NULL;
+ read_lock(&udp_hash_lock);
+
+ sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
+ skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
+
+ sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
+ if (!sk) {
+ hash = hashwild;
+ sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
+ saddr, dif);
+ }
+ if (sk) {
do {
struct sk_buff *skb1 = skb;
-
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if(sknext)
+ sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
+ daddr, uh->source, saddr, dif);
+ if (!sknext && hash != hashwild) {
+ hash = hashwild;
+ sknext = udp_v4_mcast_next(skw, hash, hport,
+ daddr, uh->source, saddr, dif);
+ }
+ if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
- if(skb1) {
+ if (skb1) {
int ret = udp_queue_rcv_skb(sk, skb1);
if (ret > 0)
- /* we should probably re-process instead
- * of dropping packets here. */
+ /*
+ * we should probably re-process
+ * instead of dropping packets here.
+ */
kfree_skb(skb1);
}
sk = sknext;
- } while(sknext);
+ } while (sknext);
} else
kfree_skb(skb);
read_unlock(&udp_hash_lock);
@@ -1166,25 +1249,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
-static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
+static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
+ int proto)
{
+ const struct iphdr *iph;
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
+ iph = ip_hdr(skb);
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->len, IPPROTO_UDP, skb->csum ))
+ if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+ proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
- skb->nh.iph->daddr,
- skb->len, IPPROTO_UDP, 0);
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb->len, proto, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
- /* UDP = UDP-Lite with a non-partial checksum coverage */
- UDP_SKB_CB(skb)->partial_cov = 0;
+ return 0;
}
/*
@@ -1192,14 +1287,14 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
*/
int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
- int is_udplite)
+ int proto)
{
struct sock *sk;
- struct udphdr *uh = skb->h.uh;
+ struct udphdr *uh = udp_hdr(skb);
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
- __be32 saddr = skb->nh.iph->saddr;
- __be32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = ip_hdr(skb)->saddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
/*
* Validate the packet.
@@ -1211,24 +1306,21 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (ulen > skb->len)
goto short_packet;
- if(! is_udplite ) { /* UDP validates ulen. */
-
+ if (proto == IPPROTO_UDP) {
+ /* UDP validates ulen. */
if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
goto short_packet;
- uh = skb->h.uh;
-
- udp4_csum_init(skb, uh);
-
- } else { /* UDP-Lite validates cscov. */
- if (udplite4_csum_init(skb, uh))
- goto csum_error;
+ uh = udp_hdr(skb);
}
- if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ if (udp4_csum_init(skb, uh, proto))
+ goto csum_error;
+
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
- skb->dev->ifindex, udptable );
+ skb->dev->ifindex, udptable);
if (sk != NULL) {
int ret = udp_queue_rcv_skb(sk, skb);
@@ -1250,7 +1342,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+ UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1258,11 +1350,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
* don't wanna listen. Ignore it.
*/
kfree_skb(skb);
- return(0);
+ return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
- is_udplite? "-Lite" : "",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
ulen,
@@ -1277,21 +1369,21 @@ csum_error:
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
- is_udplite? "-Lite" : "",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
NIPQUAD(daddr),
ntohs(uh->dest),
ulen);
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
- return(0);
+ return 0;
}
-__inline__ int udp_rcv(struct sk_buff *skb)
+int udp_rcv(struct sk_buff *skb)
{
- return __udp4_lib_rcv(skb, udp_hash, 0);
+ return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
int udp_destroy_sock(struct sock *sk)
@@ -1313,13 +1405,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int val;
int err = 0;
- if(optlen<sizeof(int))
+ if (optlen<sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
return -EFAULT;
- switch(optname) {
+ switch (optname) {
case UDP_CORK:
if (val != 0) {
up->corkflag = 1;
@@ -1373,7 +1465,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
default:
err = -ENOPROTOOPT;
break;
- };
+ }
return err;
}
@@ -1404,15 +1496,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
struct udp_sock *up = udp_sk(sk);
int val, len;
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
len = min_t(unsigned int, len, sizeof(int));
- if(len < 0)
+ if (len < 0)
return -EINVAL;
- switch(optname) {
+ switch (optname) {
case UDP_CORK:
val = up->corkflag;
break;
@@ -1433,11 +1525,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
default:
return -ENOPROTOOPT;
- };
+ }
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval, &val,len))
+ if (copy_to_user(optval, &val,len))
return -EFAULT;
return 0;
}
@@ -1486,15 +1578,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sk_buff *skb;
spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL) {
- if (udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+ __skb_unlink(skb, rcvq);
+ kfree_skb(skb);
}
spin_unlock_bh(&rcvq->lock);
@@ -1573,7 +1661,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
struct sock *sk = udp_get_first(seq);
if (sk)
- while(pos && (sk = udp_get_next(seq, sk)) != NULL)
+ while (pos && (sk = udp_get_next(seq, sk)) != NULL)
--pos;
return pos ? NULL : sk;
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index b28fe1edf98b..f34fd686a8f1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
static int udplite_rcv(struct sk_buff *skb)
{
- return __udp4_lib_rcv(skb, udplite_hash, 1);
+ return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplite_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e89..5ceca951d73f 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
switch (nexthdr) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- *spi = skb->nh.iph->saddr;
+ *spi = ip_hdr(skb)->saddr;
*seq = 0;
return 0;
}
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
#ifdef CONFIG_NETFILTER
static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
-
if (skb->dst == NULL) {
+ const struct iphdr *iph = ip_hdr(skb);
+
if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
skb->dev))
goto drop;
@@ -55,18 +55,18 @@ drop:
int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
{
- int err;
__be32 spi, seq;
struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
int decaps = 0;
+ int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
- if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+ if (err != 0)
goto drop;
do {
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
if (xfrm_nr == XFRM_MAX_DEPTH)
goto drop;
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
break;
}
- if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+ err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
+ if (err < 0)
goto drop;
} while (!err);
@@ -146,15 +147,15 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
return 0;
} else {
#ifdef CONFIG_NETFILTER
- __skb_push(skb, skb->data - skb->nh.raw);
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
+ __skb_push(skb, skb->data - skb_network_header(skb));
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
xfrm4_rcv_encap_finish);
return 0;
#else
- return -skb->nh.iph->protocol;
+ return -ip_hdr(skb)->protocol;
#endif
}
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 89cf59ea7bbe..a73e710740c2 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -29,32 +29,34 @@
*/
static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph, *top_iph = NULL;
+ struct iphdr *iph, *top_iph;
int hdrlen, optlen;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
+ iph = ip_hdr(skb);
+ skb->transport_header = skb->network_header;
hdrlen = 0;
optlen = iph->ihl * 4 - sizeof(*iph);
if (unlikely(optlen))
hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
- skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
- top_iph = skb->nh.iph;
- hdrlen = iph->ihl * 4 - optlen;
- skb->h.raw += hdrlen;
+ skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen);
+ skb_reset_network_header(skb);
+ top_iph = ip_hdr(skb);
+ skb->transport_header += sizeof(*iph) - hdrlen;
- memmove(top_iph, iph, hdrlen);
+ memmove(top_iph, iph, sizeof(*iph));
if (unlikely(optlen)) {
struct ip_beet_phdr *ph;
BUG_ON(optlen < 0);
- ph = (struct ip_beet_phdr *)skb->h.raw;
+ ph = (struct ip_beet_phdr *)skb_transport_header(skb);
ph->padlen = 4 - (optlen & 4);
- ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
+ ph->hdrlen = optlen / 8;
ph->nexthdr = top_iph->protocol;
+ if (ph->padlen)
+ memset(ph + 1, IPOPT_NOP, ph->padlen);
top_iph->protocol = IPPROTO_BEETPH;
top_iph->ihl = sizeof(struct iphdr) / 4;
@@ -68,46 +70,45 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
int phlen = 0;
int optlen = 0;
- __u8 ph_nexthdr = 0, protocol = 0;
+ u8 ph_nexthdr = 0;
int err = -EINVAL;
- protocol = iph->protocol;
-
if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
- struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
+ struct ip_beet_phdr *ph;
if (!pskb_may_pull(skb, sizeof(*ph)))
goto out;
+ ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
- phlen = ph->hdrlen * 8;
- optlen = phlen - ph->padlen - sizeof(*ph);
+ phlen = sizeof(*ph) + ph->padlen;
+ optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
if (optlen < 0 || optlen & 3 || optlen > 250)
goto out;
- if (!pskb_may_pull(skb, phlen))
+ if (!pskb_may_pull(skb, phlen + optlen))
goto out;
+ skb->len -= phlen + optlen;
ph_nexthdr = ph->nexthdr;
}
- skb_push(skb, sizeof(*iph) - phlen + optlen);
- memmove(skb->data, skb->nh.raw, sizeof(*iph));
- skb->nh.raw = skb->data;
+ skb_set_network_header(skb, phlen - sizeof(*iph));
+ memmove(skb_network_header(skb), iph, sizeof(*iph));
+ skb_set_transport_header(skb, phlen + optlen);
+ skb->data = skb_transport_header(skb);
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->ihl = (sizeof(*iph) + optlen) / 4;
- iph->tot_len = htons(skb->len);
+ iph->tot_len = htons(skb->len + iph->ihl * 4);
iph->daddr = x->sel.daddr.a4;
iph->saddr = x->sel.saddr.a4;
if (ph_nexthdr)
iph->protocol = ph_nexthdr;
- else
- iph->protocol = protocol;
iph->check = 0;
- iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
err = 0;
out:
return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 92676b7e4034..601047161ea6 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -23,16 +23,13 @@
*/
static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph;
- int ihl;
+ struct iphdr *iph = ip_hdr(skb);
+ int ihl = iph->ihl * 4;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
-
- ihl = iph->ihl * 4;
- skb->h.raw += ihl;
-
- skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
+ skb->transport_header = skb->network_header + ihl;
+ skb_push(skb, x->props.header_len);
+ skb_reset_network_header(skb);
+ memmove(skb_network_header(skb), iph, ihl);
return 0;
}
@@ -46,12 +43,15 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
- int ihl = skb->data - skb->h.raw;
+ int ihl = skb->data - skb_transport_header(skb);
- if (skb->h.raw != skb->nh.raw)
- skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
- skb->nh.iph->tot_len = htons(skb->len + ihl);
- skb->h.raw = skb->data;
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ip_hdr(skb)->tot_len = htons(skb->len + ihl);
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ceb4376f572a..a2f2e6a5ec5d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,8 +16,8 @@
static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
{
- struct iphdr *outer_iph = skb->nh.iph;
- struct iphdr *inner_iph = skb->h.ipiph;
+ struct iphdr *outer_iph = ip_hdr(skb);
+ struct iphdr *inner_iph = ipip_hdr(skb);
if (INET_ECN_is_ce(outer_iph->tos))
IP_ECN_set_ce(inner_iph);
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
/* Add encapsulation header.
@@ -46,11 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
int flags;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
+ iph = ip_hdr(skb);
+ skb->transport_header = skb->network_header;
- skb->nh.raw = skb_push(skb, x->props.header_len);
- top_iph = skb->nh.iph;
+ skb_push(skb, x->props.header_len);
+ skb_reset_network_header(skb);
+ top_iph = ip_hdr(skb);
top_iph->ihl = 5;
top_iph->version = 4;
@@ -90,10 +91,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
+ const unsigned char *old_mac;
int err = -EINVAL;
- switch(iph->protocol){
+ switch (iph->protocol){
case IPPROTO_IPIP:
break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -111,10 +113,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_IPIP) {
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv4_copy_dscp(iph, skb->h.ipiph);
+ ipv4_copy_dscp(iph, ipip_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip_ecn_decapsulate(skb);
}
@@ -125,9 +127,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
}
#endif
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
- skb->nh.raw = skb->data;
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_reset_network_header(skb);
err = 0;
out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 038ca160fe2c..44ef208a75cb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
struct dst_entry *dst;
- struct iphdr *iph = skb->nh.iph;
if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
goto out;
IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
- if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
goto out;
dst = skb->dst;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5d51a2af34c1..4ff8ed30024f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
unsigned short encap_family = xfrm[i]->props.family;
- switch(encap_family) {
+ switch (encap_family) {
case AF_INET:
fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
@@ -209,8 +209,8 @@ error:
static void
_decode_session4(struct sk_buff *skb, struct flowi *fl)
{
- struct iphdr *iph = skb->nh.iph;
- u8 *xprth = skb->nh.raw + iph->ihl*4;
+ struct iphdr *iph = ip_hdr(skb);
+ u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
memset(fl, 0, sizeof(struct flowi));
if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
default:
fl->fl_ipsec_spi = 0;
break;
- };
+ }
}
fl->proto = iph->protocol;
fl->fl4_dst = iph->daddr;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 3eef06454da9..568510304553 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -12,9 +12,8 @@
static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph;
+ struct iphdr *iph = ip_hdr(skb);
- iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
ip_send_check(iph);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 79682efb14be..8e5d54f23b49 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -57,6 +57,16 @@ config IPV6_ROUTE_INFO
If unsure, say N.
+config IPV6_OPTIMISTIC_DAD
+ bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ ---help---
+ This is experimental support for optimistic Duplicate
+ Address Detection. It allows for autoconfigured addresses
+ to be used more quickly.
+
+ If unsure, say N.
+
config INET6_AH
tristate "IPv6: AH transformation"
depends on IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d460017bb353..bb33309044c9 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,14 +7,15 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
- exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
- ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
+ exthdrs.o sysctl_net_ipv6.o datagram.o \
+ ip6_flowlabel.o inet6_connection_sock.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
xfrm6_output.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a7fee6b27320..d02685c6bc69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#endif
#include <asm/uaccess.h>
+#include <asm/unaligned.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -172,6 +173,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
#endif
#endif
.proxy_ndp = 0,
+ .accept_source_route = 0, /* we do not accept RH0 by default. */
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -203,12 +205,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
#endif
#endif
.proxy_ndp = 0,
+ .accept_source_route = 0, /* we do not accept RH0 by default. */
};
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-#if 0
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-#endif
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -244,6 +245,37 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
add_timer(&ifp->timer);
}
+static int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+ int err = -ENOMEM;
+
+ if (!idev || !idev->dev)
+ return -EINVAL;
+
+ if (snmp_mib_init((void **)idev->stats.ipv6,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip;
+ if (snmp_mib_init((void **)idev->stats.icmpv6,
+ sizeof(struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
+ goto err_icmp;
+
+ return 0;
+
+err_icmp:
+ snmp_mib_free((void **)idev->stats.ipv6);
+err_ip:
+ return err;
+}
+
+static int snmp6_free_dev(struct inet6_dev *idev)
+{
+ snmp_mib_free((void **)idev->stats.icmpv6);
+ snmp_mib_free((void **)idev->stats.ipv6);
+ return 0;
+}
+
/* Nobody refers to this device, we may destroy it. */
static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
@@ -269,6 +301,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+
static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
@@ -342,6 +376,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
}
#endif
+ if (netif_running(dev) && netif_carrier_ok(dev))
+ ndev->if_flags |= IF_READY;
+
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
#ifdef CONFIG_SYSCTL
@@ -412,7 +449,7 @@ static void addrconf_forward_change(void)
struct inet6_dev *idev;
read_lock(&dev_base_lock);
- for (dev=dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
@@ -523,6 +560,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->rt = rt;
+ /*
+ * part one of RFC 4429, section 3.3
+ * We should not configure an address as
+ * optimistic if we do not yet know the link
+ * layer address of our nexhop router
+ */
+
+ if (rt->rt6i_nexthop == NULL)
+ ifa->flags &= ~IFA_F_OPTIMISTIC;
+
ifa->idev = idev;
in6_dev_hold(idev);
/* For caller */
@@ -699,6 +746,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
int tmp_plen;
int ret = 0;
int max_addresses;
+ u32 addr_flags;
write_lock(&idev->lock);
if (ift) {
@@ -756,10 +804,17 @@ retry:
spin_unlock_bh(&ifp->lock);
write_unlock(&idev->lock);
+
+ addr_flags = IFA_F_TEMPORARY;
+ /* set in addrconf_prefix_rcv() */
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ addr_flags |= IFA_F_OPTIMISTIC;
+
ift = !max_addresses ||
ipv6_count_addresses(idev) < max_addresses ?
ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+ addr_flags) : NULL;
if (!ift || IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -804,7 +859,7 @@ struct ipv6_saddr_score {
#define IPV6_SADDR_SCORE_LABEL 0x0020
#define IPV6_SADDR_SCORE_PRIVACY 0x0040
-static int inline ipv6_saddr_preferred(int type)
+static inline int ipv6_saddr_preferred(int type)
{
if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
@@ -813,7 +868,7 @@ static int inline ipv6_saddr_preferred(int type)
}
/* static matching label */
-static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
+static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
{
/*
* prefix (longest match) label
@@ -856,7 +911,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
@@ -891,13 +946,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
* - Tentative Address (RFC2462 section 5.4)
* - A tentative address is not considered
* "assigned to an interface" in the traditional
- * sense.
+ * sense, unless it is also flagged as optimistic.
* - Candidate Source Address (section 4)
* - In any case, anycast addresses, multicast
* addresses, and the unspecified address MUST
* NOT be included in a candidate set.
*/
- if (ifa->flags & IFA_F_TENTATIVE)
+ if ((ifa->flags & IFA_F_TENTATIVE) &&
+ (!(ifa->flags & IFA_F_OPTIMISTIC)))
continue;
if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
score.addr_type & IPV6_ADDR_MULTICAST)) {
@@ -956,15 +1012,17 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
}
}
- /* Rule 3: Avoid deprecated address */
+ /* Rule 3: Avoid deprecated and optimistic addresses */
if (hiscore.rule < 3) {
if (ipv6_saddr_preferred(hiscore.addr_type) ||
- !(ifa_result->flags & IFA_F_DEPRECATED))
+ (((ifa_result->flags &
+ (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
hiscore.rule++;
}
if (ipv6_saddr_preferred(score.addr_type) ||
- !(ifa->flags & IFA_F_DEPRECATED)) {
+ (((ifa_result->flags &
+ (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
score.rule = 3;
@@ -1102,8 +1160,10 @@ int ipv6_get_saddr(struct dst_entry *dst,
return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
}
+EXPORT_SYMBOL(ipv6_get_saddr);
-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ unsigned char banned_flags)
{
struct inet6_dev *idev;
int err = -EADDRNOTAVAIL;
@@ -1114,7 +1174,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
read_lock_bh(&idev->lock);
for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+ if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
ipv6_addr_copy(addr, &ifp->addr);
err = 0;
break;
@@ -1156,6 +1216,8 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
return ifp != NULL;
}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
static
int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
{
@@ -1664,6 +1726,13 @@ ok:
if (ifp == NULL && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
+ u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !ipv6_devconf.forwarding)
+ addr_flags = IFA_F_OPTIMISTIC;
+#endif
/* Do not allow to create too much of autoconfigured
* addresses; this would be too easy way to crash kernel.
@@ -1671,7 +1740,8 @@ ok:
if (!max_addresses ||
ipv6_count_addresses(in6_dev) < max_addresses)
ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags);
if (!ifp || IS_ERR(ifp)) {
in6_dev_put(in6_dev);
@@ -1879,6 +1949,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
jiffies_to_clock_t(valid_lft * HZ), flags);
+ /*
+ * Note that section 3.1 of RFC 4429 indicates
+ * that the Optimistic flag should not be set for
+ * manually configured addresses
+ */
addrconf_dad_start(ifp, 0);
in6_ifa_put(ifp);
addrconf_verify(0);
@@ -1989,7 +2064,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
return;
}
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
struct in_device * in_dev = __in_dev_get_rtnl(dev);
if (in_dev && (dev->flags & IFF_UP)) {
struct in_ifaddr * ifa;
@@ -2055,8 +2130,16 @@ static void init_loopback(struct net_device *dev)
static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
{
struct inet6_ifaddr * ifp;
+ u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (idev->cnf.optimistic_dad &&
+ !ipv6_devconf.forwarding)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+
+ ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp, 0);
@@ -2124,7 +2207,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
{
struct in6_addr lladdr;
- if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+ if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
addrconf_add_linklocal(idev, &lladdr);
return 0;
}
@@ -2142,7 +2225,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
return;
}
/* then try to inherit it from any device */
- for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
+ for_each_netdev(link_dev) {
if (!ipv6_inherit_linklocal(idev, link_dev))
return;
}
@@ -2235,7 +2318,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
default:
addrconf_dev_config(dev);
break;
- };
+ }
if (idev) {
if (run_pending)
addrconf_dad_run(idev);
@@ -2276,8 +2359,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGENAME:
-#ifdef CONFIG_SYSCTL
if (idev) {
+ snmp6_unregister_dev(idev);
+#ifdef CONFIG_SYSCTL
addrconf_sysctl_unregister(&idev->cnf);
neigh_sysctl_unregister(idev->nd_parms);
neigh_sysctl_register(dev, idev->nd_parms,
@@ -2285,10 +2369,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
&ndisc_ifinfo_sysctl_change,
NULL);
addrconf_sysctl_register(idev, &idev->cnf);
- }
#endif
+ snmp6_register_dev(idev);
+ }
break;
- };
+ }
return NOTIFY_OK;
}
@@ -2469,7 +2554,11 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ rand_num = 0;
+ else
+ rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
ifp->probes = idev->cnf.dad_transmits;
addrconf_mod_timer(ifp, AC_DAD, rand_num);
}
@@ -2491,7 +2580,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
- ifp->flags &= ~IFA_F_TENTATIVE;
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
spin_unlock_bh(&ifp->lock);
read_unlock_bh(&idev->lock);
@@ -2511,6 +2600,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
addrconf_dad_stop(ifp);
return;
}
+
+ /*
+ * Optimistic nodes can start receiving
+ * Frames right away
+ */
+ if(ifp->flags & IFA_F_OPTIMISTIC)
+ ip6_ins_rt(ifp->rt);
+
addrconf_dad_kick(ifp);
spin_unlock_bh(&ifp->lock);
out:
@@ -2535,7 +2632,7 @@ static void addrconf_dad_timer(unsigned long data)
* DAD was successful
*/
- ifp->flags &= ~IFA_F_TENTATIVE;
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
spin_unlock_bh(&ifp->lock);
read_unlock_bh(&idev->lock);
@@ -3159,16 +3256,16 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_ip_idx = 0;
ip_idx = 0;
if ((idev = in6_dev_get(dev)) == NULL)
- continue;
+ goto cont;
read_lock_bh(&idev->lock);
switch (type) {
case UNICAST_ADDR:
@@ -3215,13 +3312,14 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
+cont:
+ idx++;
}
done:
if (err <= 0) {
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
return skb->len;
@@ -3318,7 +3416,7 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
}
-static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
+static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
__s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
@@ -3353,6 +3451,10 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
#endif
array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -3366,14 +3468,44 @@ static inline size_t inet6_if_nlmsg_size(void)
nla_total_size(4) /* IFLA_INET6_FLAGS */
+ nla_total_size(sizeof(struct ifla_cacheinfo))
+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
);
}
+static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
+ int bytes)
+{
+ int i;
+ int pad = bytes - sizeof(u64) * items;
+ BUG_ON(pad < 0);
+
+ /* Use put_unaligned() because stats may not be aligned for u64. */
+ put_unaligned(items, &stats[0]);
+ for (i = 1; i < items; i++)
+ put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+
+ memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+ int bytes)
+{
+ switch(attrtype) {
+ case IFLA_INET6_STATS:
+ __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+ break;
+ case IFLA_INET6_ICMP6STATS:
+ __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+ break;
+ }
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 pid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
- struct nlattr *conf;
+ struct nlattr *nla;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
@@ -3413,12 +3545,22 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
ci.retrans_time = idev->nd_parms->retrans_time;
NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
- conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
- if (conf == NULL)
+ nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (nla == NULL)
+ goto nla_put_failure;
+ ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+ /* XXX - MC not implemented */
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
goto nla_put_failure;
- ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf));
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
- /* XXX - Statistics/MC not implemented */
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
nla_nest_end(skb, protoinfo);
return nlmsg_end(skb, nlh);
@@ -3436,16 +3578,19 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct inet6_dev *idev;
read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if ((idev = in6_dev_get(dev)) == NULL)
- continue;
+ goto cont;
err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
in6_dev_put(idev);
if (err <= 0)
break;
+cont:
+ idx++;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
@@ -3544,30 +3689,20 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
}
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
- [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr,
- .dumpit = inet6_dump_ifaddr, },
- [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
- [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
- [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
- .dumpit = inet6_dump_fib, },
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, },
-#endif
-};
-
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
case RTM_NEWADDR:
- ip6_ins_rt(ifp->rt);
+ /*
+ * If the address was optimistic
+ * we inserted the route at the start of
+ * our DAD process, so we don't need
+ * to do it again
+ */
+ if (!(ifp->rt->rt6i_node))
+ ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
break;
@@ -3881,6 +4016,25 @@ static struct addrconf_sysctl_table
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE,
+ .procname = "accept_source_route",
+ .data = &ipv6_devconf.accept_source_route,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "optimistic_dad",
+ .data = &ipv6_devconf.optimistic_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+
+ },
+#endif
+ {
.ctl_name = 0, /* sentinel */
}
},
@@ -4007,11 +4161,15 @@ int register_inet6addr_notifier(struct notifier_block *nb)
return atomic_notifier_chain_register(&inet6addr_chain, nb);
}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
int unregister_inet6addr_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
/*
* Init / cleanup code
*/
@@ -4050,7 +4208,18 @@ int __init addrconf_init(void)
register_netdevice_notifier(&ipv6_dev_notf);
addrconf_verify(0);
- rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+
+ err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+ if (err < 0)
+ goto errout;
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
+ __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
+ __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+
#ifdef CONFIG_SYSCTL
addrconf_sysctl.sysctl_header =
register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
@@ -4058,6 +4227,10 @@ int __init addrconf_init(void)
#endif
return 0;
+errout:
+ unregister_netdevice_notifier(&ipv6_dev_notf);
+
+ return err;
}
void __exit addrconf_cleanup(void)
@@ -4069,7 +4242,6 @@ void __exit addrconf_cleanup(void)
unregister_netdevice_notifier(&ipv6_dev_notf);
- rtnetlink_links[PF_INET6] = NULL;
#ifdef CONFIG_SYSCTL
addrconf_sysctl_unregister(&ipv6_devconf_dflt);
addrconf_sysctl_unregister(&ipv6_devconf);
@@ -4081,7 +4253,7 @@ void __exit addrconf_cleanup(void)
* clean dev list.
*/
- for (dev=dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
if ((idev = __in6_dev_get(dev)) == NULL)
continue;
addrconf_ifdown(dev, 1);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5cac14a5c778..6dd377253cf7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -42,7 +42,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/icmpv6.h>
-#include <linux/smp_lock.h>
#include <linux/netfilter_ipv6.h>
#include <net/ip.h>
@@ -98,6 +97,11 @@ static int inet6_create(struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+ build_ehash_secret();
+
/* Look for the requested type/protocol pair. */
answer = NULL;
lookup_protocol:
@@ -349,6 +353,8 @@ out:
return err;
}
+EXPORT_SYMBOL(inet6_bind);
+
int inet6_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -365,6 +371,8 @@ int inet6_release(struct socket *sock)
return inet_release(sock);
}
+EXPORT_SYMBOL(inet6_release);
+
int inet6_destroy_sock(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,6 +436,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
return(0);
}
+EXPORT_SYMBOL(inet6_getname);
+
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -437,6 +447,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
case SIOCADDRT:
case SIOCDELRT:
@@ -457,6 +470,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return(0);
}
+EXPORT_SYMBOL(inet6_ioctl);
+
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -603,6 +618,8 @@ out_illegal:
goto out;
}
+EXPORT_SYMBOL(inet6_register_protosw);
+
void
inet6_unregister_protosw(struct inet_protosw *p)
{
@@ -619,6 +636,8 @@ inet6_unregister_protosw(struct inet_protosw *p)
}
}
+EXPORT_SYMBOL(inet6_unregister_protosw);
+
int inet6_sk_rebuild_header(struct sock *sk)
{
int err;
@@ -678,7 +697,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
+ ((IPV6_FLOWINFO_MASK &
+ *(__be32 *)skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
np->rxopt.bits.osrcrt)) ||
@@ -691,61 +711,28 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
-int
-snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
- if (ptr == NULL)
- return -EINVAL;
-
- ptr[0] = __alloc_percpu(mibsize);
- if (!ptr[0])
- goto err0;
-
- ptr[1] = __alloc_percpu(mibsize);
- if (!ptr[1])
- goto err1;
-
- return 0;
-
-err1:
- free_percpu(ptr[0]);
- ptr[0] = NULL;
-err0:
- return -ENOMEM;
-}
-
-void
-snmp6_mib_free(void *ptr[2])
-{
- if (ptr == NULL)
- return;
- free_percpu(ptr[0]);
- free_percpu(ptr[1]);
- ptr[0] = ptr[1] = NULL;
-}
-
static int __init init_ipv6_mibs(void)
{
- if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
goto err_ip_mib;
- if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+ if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
goto err_icmp_mib;
- if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
goto err_udp_mib;
- if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
goto err_udplite_mib;
return 0;
err_udplite_mib:
- snmp6_mib_free((void **)udp_stats_in6);
+ snmp_mib_free((void **)udp_stats_in6);
err_udp_mib:
- snmp6_mib_free((void **)icmpv6_statistics);
+ snmp_mib_free((void **)icmpv6_statistics);
err_icmp_mib:
- snmp6_mib_free((void **)ipv6_statistics);
+ snmp_mib_free((void **)ipv6_statistics);
err_ip_mib:
return -ENOMEM;
@@ -753,10 +740,10 @@ err_ip_mib:
static void cleanup_ipv6_mibs(void)
{
- snmp6_mib_free((void **)ipv6_statistics);
- snmp6_mib_free((void **)icmpv6_statistics);
- snmp6_mib_free((void **)udp_stats_in6);
- snmp6_mib_free((void **)udplite_stats_in6);
+ snmp_mib_free((void **)ipv6_statistics);
+ snmp_mib_free((void **)icmpv6_statistics);
+ snmp_mib_free((void **)udp_stats_in6);
+ snmp_mib_free((void **)udplite_stats_in6);
}
static int __init inet6_init(void)
@@ -929,6 +916,8 @@ static void __exit inet6_exit(void)
{
/* First of all disallow new sockets creation. */
sock_unregister(PF_INET6);
+ /* Disallow any further netlink messages */
+ rtnl_unregister_all(PF_INET6);
/* Cleanup code parts. */
ipv6_packet_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3c..b696c8401200 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph = (struct ipv6hdr *)skb->data;
top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_AH;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_AH;
/* When there are no extension headers, we only need to save the first
* 8 bytes of the base IP header.
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(tmp_base, top_iph, sizeof(tmp_base));
tmp_ext = NULL;
- extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+ extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
if (extlen) {
extlen += sizeof(*tmp_ext);
tmp_ext = kmalloc(extlen, GFP_ATOMIC);
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
goto error_free_iph;
}
- ah = (struct ip_auth_hdr *)skb->h.raw;
+ ah = (struct ip_auth_hdr *)skb_transport_header(skb);
ah->nexthdr = nexthdr;
top_iph->priority = 0;
@@ -316,8 +316,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
*
* To erase AH:
* Keeping copy of cleared headers. After AH processing,
- * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
- * header length. Then copy back the copy as long as hdr_len
+ * Moving the pointer of skb->network_header by using skb_pull as long
+ * as AH header length. Then copy back the copy as long as hdr_len
* If destination header following AH exists, copy it into after [Ext2].
*
* |<>|[IPv6][Ext1][Ext2][Dest][Payload]
@@ -325,6 +325,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
*/
struct ipv6_auth_hdr *ah;
+ struct ipv6hdr *ip6h;
struct ah_data *ahp;
unsigned char *tmp_hdr = NULL;
u16 hdr_len;
@@ -341,7 +342,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto out;
- hdr_len = skb->data - skb->nh.raw;
+ hdr_len = skb->data - skb_network_header(skb);
ah = (struct ipv6_auth_hdr*)skb->data;
ahp = x->data;
nexthdr = ah->nexthdr;
@@ -354,16 +355,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
+ tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
if (!tmp_hdr)
goto out;
- if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
+ ip6h = ipv6_hdr(skb);
+ if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
goto free_out;
- skb->nh.ipv6h->priority = 0;
- skb->nh.ipv6h->flow_lbl[0] = 0;
- skb->nh.ipv6h->flow_lbl[1] = 0;
- skb->nh.ipv6h->flow_lbl[2] = 0;
- skb->nh.ipv6h->hop_limit = 0;
+ ip6h->priority = 0;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->hop_limit = 0;
{
u8 auth_data[MAX_AH_AUTH_LEN];
@@ -382,7 +384,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
}
}
- skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+ skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + hdr_len);
kfree(tmp_hdr);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 09117d63256f..9b81264eb78f 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -423,14 +423,18 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
*/
int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
{
+ int found = 0;
+
if (dev)
return ipv6_chk_acast_dev(dev, addr);
read_lock(&dev_base_lock);
- for (dev=dev_base; dev; dev=dev->next)
- if (ipv6_chk_acast_dev(dev, addr))
+ for_each_netdev(dev)
+ if (ipv6_chk_acast_dev(dev, addr)) {
+ found = 1;
break;
+ }
read_unlock(&dev_base_lock);
- return dev != 0;
+ return found;
}
@@ -447,9 +451,8 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -476,7 +479,7 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b4e8dcf4c86..403eee66b9c5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -209,7 +209,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
struct sock_exterr_skb *serr;
if (!np->recverr)
@@ -227,11 +227,12 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+ serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+ skb_network_header(skb);
serr->port = port;
- skb->h.raw = payload;
__skb_pull(skb, payload - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -251,8 +252,9 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
if (!skb)
return;
- iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
- skb->nh.ipv6h = iph;
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
serr = SKB_EXT_ERR(skb);
@@ -263,11 +265,11 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+ serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
serr->port = fl->fl_ip_dport;
- skb->h.raw = skb->tail;
- __skb_pull(skb, skb->tail - skb->data);
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -309,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
+ const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = serr->port;
sin->sin6_scope_id = 0;
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
ipv6_addr_copy(&sin->sin6_addr,
- (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+ (struct in6_addr *)(nh + serr->addr_offset));
if (np->sndflow)
- sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+ sin->sin6_flowinfo =
+ (*(__be32 *)(nh + serr->addr_offset - 24) &
+ IPV6_FLOWINFO_MASK);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
ipv6_addr_set(&sin->sin6_addr, 0, 0,
htonl(0xffff),
- *(__be32*)(skb->nh.raw + serr->addr_offset));
+ *(__be32 *)(nh + serr->addr_offset));
}
}
@@ -335,7 +340,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
- ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
if (np->rxopt.all)
datagram_recv_ctl(sk, msg, skb);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -344,8 +349,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
struct inet_sock *inet = inet_sk(sk);
ipv6_addr_set(&sin->sin6_addr, 0, 0,
- htonl(0xffff),
- skb->nh.iph->saddr);
+ htonl(0xffff), ip_hdr(skb)->saddr);
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
}
@@ -381,33 +385,34 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_skb_parm *opt = IP6CB(skb);
+ unsigned char *nh = skb_network_header(skb);
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
- int hlim = skb->nh.ipv6h->hop_limit;
+ int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
- int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff;
+ int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
- if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
- __be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+ if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+ __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
}
/* HbH is allowed only once */
if (np->rxopt.bits.hopopts && opt->hop) {
- u8 *ptr = skb->nh.raw + opt->hop;
+ u8 *ptr = nh + opt->hop;
put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
}
@@ -423,11 +428,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
* IPV6_RECVDSTOPTS is more generic. --yoshfuji
*/
unsigned int off = sizeof(struct ipv6hdr);
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
while (off <= opt->lastopt) {
unsigned len;
- u8 *ptr = skb->nh.raw + off;
+ u8 *ptr = nh + off;
switch(nexthdr) {
case IPPROTO_DSTOPTS:
@@ -461,27 +466,27 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
- int hlim = skb->nh.ipv6h->hop_limit;
+ int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.ohopopts && opt->hop) {
- u8 *ptr = skb->nh.raw + opt->hop;
+ u8 *ptr = nh + opt->hop;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.odstopts && opt->dst0) {
- u8 *ptr = skb->nh.raw + opt->dst0;
+ u8 *ptr = nh + opt->dst0;
put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.osrcrt && opt->srcrt) {
- struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+ struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
}
if (np->rxopt.bits.odstopts && opt->dst1) {
- u8 *ptr = skb->nh.raw + opt->dst1;
+ u8 *ptr = nh + opt->dst1;
put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
}
return 0;
@@ -718,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
cmsg->cmsg_type);
err = -EINVAL;
break;
- };
+ }
}
exit_f:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecca..7107bb7e2e62 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -42,21 +42,19 @@
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
- int hdr_len;
struct ipv6hdr *top_iph;
struct ipv6_esp_hdr *esph;
struct crypto_blkcipher *tfm;
struct blkcipher_desc desc;
- struct esp_data *esp;
struct sk_buff *trailer;
int blksize;
int clen;
int alen;
int nfrags;
-
- esp = x->data;
- hdr_len = skb->h.raw - skb->data +
- sizeof(*esph) + esp->conf.ivlen;
+ u8 *tail;
+ struct esp_data *esp = x->data;
+ int hdr_len = (skb_transport_offset(skb) +
+ sizeof(*esph) + esp->conf.ivlen);
/* Strip IP+ESP header. */
__skb_pull(skb, hdr_len);
@@ -81,19 +79,20 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
}
/* Fill padding... */
+ tail = skb_tail_pointer(trailer);
do {
int i;
for (i=0; i<clen-skb->len - 2; i++)
- *(u8*)(trailer->tail + i) = i+1;
+ tail[i] = i + 1;
} while (0);
- *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ tail[clen-skb->len - 2] = (clen - skb->len) - 2;
pskb_put(skb, trailer, clen - skb->len);
top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
- esph = (struct ipv6_esp_hdr *)skb->h.raw;
+ esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
- *(u8*)(trailer->tail - 1) = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_ESP;
+ *(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_ESP;
esph->spi = x->id.spi;
esph->seq_no = htonl(++x->replay.oseq);
@@ -150,8 +149,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
-
- int hdr_len = skb->h.raw - skb->nh.raw;
+ int hdr_len = skb_network_header_len(skb);
int nfrags;
int ret = 0;
@@ -191,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ipv6_esp_hdr*)skb->data;
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
/* Get ivec. This can be wrong, check against another impls. */
if (esp->conf.ivlen)
@@ -231,28 +229,30 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
ret = nexthdr[1];
}
- skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
-
+ __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+ skb_set_transport_header(skb, -hdr_len);
out:
return ret;
}
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+ u32 align = max_t(u32, blksize, esp->conf.padlen);
+ u32 rem;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ rem = mtu & (align - 1);
+ mtu &= ~(align - 1);
- if (x->props.mode == XFRM_MODE_TUNNEL) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
+ if (x->props.mode != XFRM_MODE_TUNNEL) {
u32 padsize = ((blksize - 1) & 7) + 1;
- mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
+ mtu -= blksize - padsize;
+ mtu += min_t(u32, blksize - padsize, rem);
}
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ return mtu - 2;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -382,7 +382,7 @@ static struct xfrm_type esp6_type =
.proto = IPPROTO_ESP,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_max_size = esp6_get_max_size,
+ .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output,
.hdr_offset = xfrm6_find_1stfragopt,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 28e0c6568272..6d8e4ac7bdad 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
{
- int packet_len = skb->tail - skb->nh.raw;
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb->tail - skb->network_header;
struct ipv6_opt_hdr *hdr;
int len;
if (offset + 2 > packet_len)
goto bad;
- hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
len = ((hdr->hdrlen + 1) << 3);
if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
len -= 2;
while (len > 0) {
- int opttype = skb->nh.raw[offset];
+ int opttype = nh[offset];
int optlen;
if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
- optlen = skb->nh.raw[offset + 1] + 2;
+ optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;
break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
- switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+ switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
@@ -124,12 +125,12 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
/* Actually, it is redundant check. icmp_send
will recheck in any case.
*/
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
break;
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return 0;
- };
+ }
kfree_skb(skb);
return 0;
@@ -141,19 +142,20 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
struct tlvtype_proc *curr;
- int off = skb->h.raw - skb->nh.raw;
- int len = ((skb->h.raw[1]+1)<<3);
+ const unsigned char *nh = skb_network_header(skb);
+ int off = skb_network_header_len(skb);
+ int len = (skb_transport_header(skb)[1] + 1) << 3;
- if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+ if (skb_transport_offset(skb) + len > skb_headlen(skb))
goto bad;
off += 2;
len -= 2;
while (len > 0) {
- int optlen = skb->nh.raw[off+1]+2;
+ int optlen = nh[off + 1] + 2;
- switch (skb->nh.raw[off]) {
+ switch (nh[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
if (optlen > len)
goto bad;
for (curr=procs; curr->type >= 0; curr++) {
- if (curr->type == skb->nh.raw[off]) {
+ if (curr->type == nh[off]) {
/* type specific length/alignment
checks will be performed in the
func(). */
@@ -200,7 +202,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
struct sk_buff *skb = *skbp;
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct in6_addr tmp_addr;
int ret;
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
opt->dsthao = opt->dst1;
opt->dst1 = 0;
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
if (hao->length != 16) {
LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
/* update all variable using below by copied skbuff */
*skbp = skb = skb2;
- hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
- ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+ optoff);
+ ipv6h = ipv6_hdr(skb2);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -255,7 +258,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
ipv6_addr_copy(&hao->addr, &tmp_addr);
- if (skb->tstamp.off_sec == 0)
+ if (skb->tstamp.tv64 == 0)
__net_timestamp(skb);
return 1;
@@ -285,16 +288,16 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
#endif
struct dst_entry *dst;
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
- !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
- opt->lastopt = skb->h.raw - skb->nh.raw;
- opt->dst1 = skb->h.raw - skb->nh.raw;
+ opt->lastopt = opt->dst1 = skb_network_header_len(skb);
#ifdef CONFIG_IPV6_MIP6
dstbuf = opt->dst1;
#endif
@@ -303,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
dst_release(dst);
skb = *skbp;
- skb->h.raw += ((skb->h.raw[1]+1)<<3);
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
#ifdef CONFIG_IPV6_MIP6
opt->nhoff = dstbuf;
@@ -362,22 +365,58 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
struct in6_addr daddr;
+ struct inet6_dev *idev;
int n, i;
-
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
+ int accept_source_route = ipv6_devconf.accept_source_route;
+
+ if (accept_source_route < 0 ||
+ ((idev = in6_dev_get(skb->dev)) == NULL)) {
+ kfree_skb(skb);
+ return -1;
+ }
+ if (idev->cnf.accept_source_route < 0) {
+ in6_dev_put(idev);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ if (accept_source_route > idev->cnf.accept_source_route)
+ accept_source_route = idev->cnf.accept_source_route;
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
- !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ in6_dev_put(idev);
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
- hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+ switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
+ break;
+#endif
+ case IPV6_SRCRT_TYPE_0:
+ if (accept_source_route > 0)
+ break;
+ kfree_skb(skb);
+ return -1;
+ default:
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ (&hdr->type) - skb_network_header(skb));
+ return -1;
+ }
+
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INADDRERRORS);
@@ -405,12 +444,11 @@ looped_back:
break;
}
- opt->lastopt = skb->h.raw - skb->nh.raw;
- opt->srcrt = skb->h.raw - skb->nh.raw;
- skb->h.raw += (hdr->hdrlen + 1) << 3;
+ opt->lastopt = opt->srcrt = skb_network_header_len(skb);
+ skb->transport_header += (hdr->hdrlen + 1) << 3;
opt->dst0 = opt->dst1;
opt->dst1 = 0;
- opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
+ opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
return 1;
}
@@ -419,7 +457,9 @@ looped_back:
if (hdr->hdrlen & 0x01) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->hdrlen) -
+ skb_network_header(skb)));
return -1;
}
break;
@@ -434,11 +474,6 @@ looped_back:
}
break;
#endif
- default:
- IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
- IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
- return -1;
}
/*
@@ -451,7 +486,9 @@ looped_back:
if (hdr->segments_left > n) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->segments_left) -
+ skb_network_header(skb)));
return -1;
}
@@ -470,7 +507,7 @@ looped_back:
kfree_skb(skb);
*skbp = skb = skb2;
opt = IP6CB(skb2);
- hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -486,7 +523,7 @@ looped_back:
#ifdef CONFIG_IPV6_MIP6
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
- (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
IPPROTO_ROUTING) < 0) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INADDRERRORS);
@@ -513,19 +550,19 @@ looped_back:
}
ipv6_addr_copy(&daddr, addr);
- ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
- ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+ ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
+ ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
dst_release(xchg(&skb->dst, NULL));
ip6_route_input(skb);
if (skb->dst->error) {
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb);
return -1;
}
if (skb->dst->dev->flags&IFF_LOOPBACK) {
- if (skb->nh.ipv6h->hop_limit <= 1) {
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
@@ -533,11 +570,11 @@ looped_back:
kfree_skb(skb);
return -1;
}
- skb->nh.ipv6h->hop_limit--;
+ ipv6_hdr(skb)->hop_limit--;
goto looped_back;
}
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb);
return -1;
}
@@ -628,13 +665,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
+ const unsigned char *nh = skb_network_header(skb);
- if (skb->nh.raw[optoff+1] == 2) {
+ if (nh[optoff + 1] == 2) {
IP6CB(skb)->ra = optoff;
return 1;
}
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
- skb->nh.raw[optoff+1]);
+ nh[optoff + 1]);
kfree_skb(skb);
return 0;
}
@@ -644,23 +682,24 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
+ const unsigned char *nh = skb_network_header(skb);
u32 pkt_len;
- if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+ if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
- skb->nh.raw[optoff+1]);
+ nh[optoff+1]);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
+ pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return 0;
}
- if (skb->nh.ipv6h->payload_len) {
+ if (ipv6_hdr(skb)->payload_len) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return 0;
@@ -699,13 +738,14 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
struct inet6_skb_parm *opt = IP6CB(skb);
/*
- * skb->nh.raw is equal to skb->data, and
- * skb->h.raw - skb->nh.raw is always equal to
+ * skb_network_header(skb) is equal to skb->data, and
+ * skb_network_header_len(skb) is always equal to
* sizeof(struct ipv6hdr) by definition of
* hop-by-hop options.
*/
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
- !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+ !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
kfree_skb(skb);
return -1;
}
@@ -713,7 +753,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
opt->hop = sizeof(struct ipv6hdr);
if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
skb = *skbp;
- skb->h.raw += (skb->h.raw[1]+1)<<3;
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
return 1;
@@ -782,6 +822,8 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
}
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+
void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
{
if (opt->dst1opt)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 0862809ffcf7..fc3882c90604 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -17,6 +17,7 @@
#include <net/fib_rules.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/netlink.h>
@@ -95,8 +96,27 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if (table)
rt = lookup(table, flp, flags);
- if (rt != &ip6_null_entry)
+ if (rt != &ip6_null_entry) {
+ struct fib6_rule *r = (struct fib6_rule *)rule;
+
+ /*
+ * If we need to find a source address for this traffic,
+ * we check the result if it meets requirement of the rule.
+ */
+ if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+ r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+ struct in6_addr saddr;
+ if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
+ &saddr))
+ goto again;
+ if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+ r->src.plen))
+ goto again;
+ ipv6_addr_copy(&flp->fl6_src, &saddr);
+ }
goto out;
+ }
+again:
dst_release(&rt->u.dst);
rt = NULL;
goto out;
@@ -117,9 +137,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
return 0;
+ /*
+ * If FIB_RULE_FIND_SADDR is set and we do not have a
+ * source address for the traffic, we defer check for
+ * source address.
+ */
if (r->src.plen) {
- if (!(flags & RT6_LOOKUP_F_HAS_SADDR) ||
- !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+ if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+ if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
+ r->src.plen))
+ return 0;
+ } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
return 0;
}
@@ -131,8 +159,6 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
FRA_GENERIC_POLICY,
- [FRA_SRC] = { .len = sizeof(struct in6_addr) },
- [FRA_DST] = { .len = sizeof(struct in6_addr) },
};
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
@@ -142,9 +168,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
- if (frh->src_len > 128 || frh->dst_len > 128)
- goto errout;
-
if (rule->action == FR_ACT_TO_TBL) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
@@ -155,11 +178,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
}
- if (tb[FRA_SRC])
+ if (frh->src_len)
nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
sizeof(struct in6_addr));
- if (tb[FRA_DST])
+ if (frh->dst_len)
nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
sizeof(struct in6_addr));
@@ -186,11 +209,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->tos && (rule6->tclass != frh->tos))
return 0;
- if (tb[FRA_SRC] &&
+ if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
- if (tb[FRA_DST] &&
+ if (frh->dst_len &&
nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
return 0;
@@ -221,11 +244,6 @@ nla_put_failure:
return -ENOBUFS;
}
-int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return fib_rules_dump(skb, cb, AF_INET6);
-}
-
static u32 fib6_rule_default_pref(void)
{
return 0x3FFF;
@@ -240,6 +258,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
static struct fib_rules_ops fib6_rules_ops = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
+ .addr_size = sizeof(struct in6_addr),
.action = fib6_rule_action,
.match = fib6_rule_match,
.configure = fib6_rule_configure,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index edfe98bf64c3..e9bcce9e7bdf 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
#include <asm/system.h>
DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
+EXPORT_SYMBOL(icmpv6_statistics);
/*
* The ICMP socket(s). This is the most convenient way to flow control
@@ -128,9 +129,9 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
static int is_ineligible(struct sk_buff *skb)
{
- int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+ int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
int len = skb->len - ptr;
- __u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
if (len < 0)
return 1;
@@ -205,7 +206,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
{
u8 _optval, *op;
- offset += skb->nh.raw - skb->data;
+ offset += skb_network_offset(skb);
op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
if (op == NULL)
return 1;
@@ -221,7 +222,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
- icmp6h = (struct icmp6hdr*) skb->h.raw;
+ icmp6h = icmp6_hdr(skb);
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
icmp6h->icmp6_cksum = 0;
@@ -274,7 +275,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
#ifdef CONFIG_IPV6_MIP6
static void mip6_addr_swap(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6_destopt_hao *hao;
struct in6_addr tmp;
@@ -283,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
if (opt->dsthao) {
off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
if (likely(off >= 0)) {
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + off);
ipv6_addr_copy(&tmp, &iph->saddr);
ipv6_addr_copy(&iph->saddr, &hao->addr);
ipv6_addr_copy(&hao->addr, &tmp);
@@ -301,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct net_device *dev)
{
struct inet6_dev *idev = NULL;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
struct ipv6_pinfo *np;
struct in6_addr *saddr = NULL;
@@ -315,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
int hlimit, tclass;
int err = 0;
- if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+ if ((u8 *)hdr < skb->head ||
+ (skb->network_header + sizeof(*hdr)) > skb->tail)
return;
/*
@@ -430,7 +433,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
tclass = 0;
msg.skb = skb;
- msg.offset = skb->nh.raw - skb->data;
+ msg.offset = skb_network_offset(skb);
msg.type = type;
len = skb->len - msg.offset;
@@ -466,13 +469,15 @@ out:
icmpv6_xmit_unlock();
}
+EXPORT_SYMBOL(icmpv6_send);
+
static void icmpv6_echo_reply(struct sk_buff *skb)
{
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
struct in6_addr *saddr = NULL;
- struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
struct icmp6hdr tmp_hdr;
struct flowi fl;
struct icmpv6_msg msg;
@@ -481,7 +486,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int hlimit;
int tclass;
- saddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->daddr;
if (!ipv6_unicast_destination(skb))
saddr = NULL;
@@ -491,7 +496,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
memset(&fl, 0, sizeof(fl));
fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
if (saddr)
ipv6_addr_copy(&fl.fl6_src, saddr);
fl.oif = skb->dev->ifindex;
@@ -579,8 +584,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
if (!pskb_may_pull(skb, inner_offset+8))
return;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
@@ -624,8 +629,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
/* Perform checksum. */
switch (skb->ip_summed) {
@@ -647,7 +652,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
goto discard_it;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
type = hdr->icmp6_type;
@@ -673,7 +678,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
*/
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto discard_it;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
orig_hdr = (struct ipv6hdr *) (hdr + 1);
rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
ntohl(hdr->icmp6_mtu));
@@ -727,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
*/
icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
- };
+ }
+
kfree_skb(skb);
return 0;
@@ -860,11 +866,13 @@ int icmpv6_err_convert(int type, int code, int *err)
case ICMPV6_TIME_EXCEED:
*err = EHOSTUNREACH;
break;
- };
+ }
return fatal;
}
+EXPORT_SYMBOL(icmpv6_err_convert);
+
#ifdef CONFIG_SYSCTL
ctl_table ipv6_icmp_table[] = {
{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f4d7be77eb0f..ca08ee88d07f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -359,7 +359,7 @@ end:
return res;
}
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -658,6 +658,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &iter->u.dst.rt6_next;
}
+ /* Reset round-robin state, if necessary */
+ if (ins == &fn->leaf)
+ fn->rr_ptr = NULL;
+
/*
* insert node
*/
@@ -1109,6 +1113,10 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt6_stats.fib_rt_entries--;
rt6_stats.fib_discarded_routes++;
+ /* Reset round-robin state, if necessary */
+ if (fn->rr_ptr == rt)
+ fn->rr_ptr = NULL;
+
/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
@@ -1478,6 +1486,8 @@ void __init fib6_init(void)
NULL, NULL);
fib6_tables_init();
+
+ __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
}
void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 11bfc7c43182..be0ee8a34f9b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -96,25 +96,27 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
if (hdr->version != 6)
goto err;
- skb->h.raw = (u8 *)(hdr + 1);
+ skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
pkt_len = ntohs(hdr->payload_len);
/* pkt_len may be zero if Jumbo payload option is present */
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
- if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
- goto truncated;
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
}
if (hdr->nexthdr == NEXTHDR_HOP) {
@@ -128,8 +130,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
rcu_read_unlock();
return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
-truncated:
- IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
err:
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
drop:
@@ -160,10 +160,10 @@ static inline int ip6_input_finish(struct sk_buff *skb)
rcu_read_lock();
resubmit:
idev = ip6_dst_idev(skb->dst);
- if (!pskb_pull(skb, skb->h.raw - skb->data))
+ if (!pskb_pull(skb, skb_transport_offset(skb)))
goto discard;
nhoff = IP6CB(skb)->nhoff;
- nexthdr = skb->nh.raw[nhoff];
+ nexthdr = skb_network_header(skb)[nhoff];
raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,9 +181,9 @@ resubmit:
indefinitely. */
nf_reset(skb);
- skb_postpull_rcsum(skb, skb->nh.raw,
- skb->h.raw - skb->nh.raw);
- hdr = skb->nh.ipv6h;
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ hdr = ipv6_hdr(skb);
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
@@ -234,7 +234,7 @@ int ip6_mc_input(struct sk_buff *skb)
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 305516921aa8..f508171bab73 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,8 +88,8 @@ static inline int ip6_output_finish(struct sk_buff *skb)
/* dev_loopback_xmit for use with netfilter. */
static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
{
- newskb->mac.raw = newskb->data;
- __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ skb_reset_mac_header(newskb);
+ __skb_pull(newskb, skb_network_offset(newskb));
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
@@ -107,13 +107,13 @@ static int ip6_output2(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
struct inet6_dev *idev = ip6_dst_idev(skb->dst);
if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
- ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr)) {
+ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr)) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -124,7 +124,7 @@ static int ip6_output2(struct sk_buff *skb)
newskb->dev,
ip6_dev_loopback_xmit);
- if (skb->nh.ipv6h->hop_limit == 0) {
+ if (ipv6_hdr(skb)->hop_limit == 0) {
IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
}
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+ return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+ skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
int ip6_output(struct sk_buff *skb)
{
- if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
+ if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
else
@@ -191,7 +199,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
}
- hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
/*
* Fill in the IPv6 header
@@ -239,6 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
return -EMSGSIZE;
}
+EXPORT_SYMBOL(ip6_xmit);
+
/*
* To avoid extra problems ND packets are send through this
* routine. It's code duplication but I really want to avoid
@@ -259,8 +271,9 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
totlen = len + sizeof(struct ipv6hdr);
- hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
- skb->nh.ipv6h = hdr;
+ skb_reset_network_header(skb);
+ skb_put(skb, sizeof(struct ipv6hdr));
+ hdr = ipv6_hdr(skb);
*(__be32*)hdr = htonl(0x60000000);
@@ -305,7 +318,7 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
u8 nexthdr = hdr->nexthdr;
int offset;
@@ -319,10 +332,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
if (nexthdr == IPPROTO_ICMPV6) {
struct icmp6hdr *icmp6;
- if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+ if (!pskb_may_pull(skb, (skb_network_header(skb) +
+ offset + 1 - skb->data)))
return 0;
- icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
switch (icmp6->icmp6_type) {
case NDISC_ROUTER_SOLICITATION:
@@ -361,7 +375,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
if (ipv6_devconf.forwarding == 0)
@@ -372,7 +386,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
/*
* We DO NOT make any processing on
@@ -388,7 +402,7 @@ int ip6_forward(struct sk_buff *skb)
* that different fragments will go along one path. --ANK
*/
if (opt->ra) {
- u8 *ptr = skb->nh.raw + opt->ra;
+ u8 *ptr = skb_network_header(skb) + opt->ra;
if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
return 0;
}
@@ -470,7 +484,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
/* Mangling hops number delayed to point after skb COW */
@@ -499,33 +513,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
-#ifdef CONFIG_NETFILTER
- /* Connection association is same as pre-frag packet */
- nf_conntrack_put(to->nfct);
- to->nfct = from->nfct;
- nf_conntrack_get(to->nfct);
- to->nfctinfo = from->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- nf_conntrack_put_reasm(to->nfct_reasm);
- to->nfct_reasm = from->nfct_reasm;
- nf_conntrack_get_reasm(to->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- nf_bridge_put(to->nf_bridge);
- to->nf_bridge = from->nf_bridge;
- nf_bridge_get(to->nf_bridge);
-#endif
-#endif
+ nf_copy(to, from);
skb_copy_secmark(to, from);
}
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -550,7 +549,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
}
return offset;
@@ -574,7 +574,20 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
- mtu = dst_mtu(&rt->u.dst);
+ mtu = ip6_skb_dst_mtu(skb);
+
+ /* We must not fragment if the socket is set to force MTU discovery
+ * or if the skb it not generated by a local socket. (This last
+ * check should be redundant, but it's free.)
+ */
+ if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
+ skb->dev = skb->dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
if (np && np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -616,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
+ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
return -ENOMEM;
@@ -624,8 +637,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
__skb_pull(skb, hlen);
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
- skb->nh.raw = __skb_push(skb, hlen);
- memcpy(skb->nh.raw, tmp_hdr, hlen);
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), tmp_hdr, hlen);
ipv6_select_ident(skb, fh);
fh->nexthdr = nexthdr;
@@ -636,7 +650,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
- skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+ ipv6_hdr(skb)->payload_len = htons(first_len -
+ sizeof(struct ipv6hdr));
dst_hold(&rt->u.dst);
@@ -645,10 +660,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
* before previous one went down. */
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
- frag->h.raw = frag->data;
+ skb_reset_transport_header(frag);
fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
- frag->nh.raw = __skb_push(frag, hlen);
- memcpy(frag->nh.raw, tmp_hdr, hlen);
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), tmp_hdr,
+ hlen);
offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = nexthdr;
fh->reserved = 0;
@@ -656,7 +673,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (frag->next != NULL)
fh->frag_off |= htons(IP6_MF);
fh->identification = frag_id;
- frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ipv6_hdr(frag)->payload_len =
+ htons(frag->len -
+ sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
@@ -733,9 +752,10 @@ slow_path:
ip6_copy_metadata(frag, skb);
skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- frag->nh.raw = frag->data;
- fh = (struct frag_hdr*)(frag->data + hlen);
- frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+ frag->transport_header = (frag->network_header + hlen +
+ sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
@@ -747,7 +767,7 @@ slow_path:
/*
* Copy the packet header into the new buffer.
*/
- memcpy(frag->nh.raw, skb->data, hlen);
+ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
/*
* Build fragment header.
@@ -763,14 +783,15 @@ slow_path:
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+ if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len))
BUG();
left -= len;
fh->frag_off = htons(offset);
if (left > 0)
fh->frag_off |= htons(IP6_MF);
- frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ipv6_hdr(frag)->payload_len = htons(frag->len -
+ sizeof(struct ipv6hdr));
ptr += len;
offset += len;
@@ -861,6 +882,41 @@ static int ip6_dst_lookup_tail(struct sock *sk,
goto out_err_release;
}
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * Here if the dst entry we've looked up
+ * has a neighbour entry that is in the INCOMPLETE
+ * state and the src address from the flow is
+ * marked as OPTIMISTIC, we release the found
+ * dst entry and replace it instead with the
+ * dst entry of the nexthop router
+ */
+ if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
+ struct inet6_ifaddr *ifp;
+ struct flowi fl_gw;
+ int redirect;
+
+ ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
+
+ redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+ if (ifp)
+ in6_ifa_put(ifp);
+
+ if (redirect) {
+ /*
+ * We need to get the dst entry for the
+ * default router instead
+ */
+ dst_release(*dst);
+ memcpy(&fl_gw, fl, sizeof(struct flowi));
+ memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+ *dst = ip6_route_output(sk, &fl_gw);
+ if ((err = (*dst)->error))
+ goto out_err_release;
+ }
+ }
+#endif
+
return 0;
out_err_release:
@@ -939,10 +995,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_put(skb,fragheaderlen + transhdrlen);
/* initialize network header pointer */
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* initialize protocol header pointer */
- skb->h.raw = skb->data + fragheaderlen;
+ skb->transport_header = skb->network_header + fragheaderlen;
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
@@ -1015,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
inet->cork.fl = *fl;
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
- mtu = dst_mtu(rt->u.dst.path);
+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -1162,10 +1219,10 @@ alloc_new_skb:
* Find where to start putting bytes
*/
data = skb_put(skb, fraglen);
- skb->nh.raw = data + exthdrlen;
+ skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
- skb->h.raw = data + exthdrlen;
-
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
skb_prev, maxfraglen,
@@ -1288,10 +1345,10 @@ int ip6_push_pending_frames(struct sock *sk)
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
- if (skb->data < skb->nh.raw)
- __skb_pull(skb, skb->nh.raw - skb->data);
+ if (skb->data < skb_network_header(skb))
+ __skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
- __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
@@ -1303,13 +1360,15 @@ int ip6_push_pending_frames(struct sock *sk)
}
ipv6_addr_copy(final_dst, &fl->fl6_dst);
- __skb_pull(skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(skb, skb_network_header_len(skb));
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
- skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
*(__be32*)hdr = fl->fl6_flowlabel |
htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 08d944223ec8..a0902fbdb4e1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,15 @@
/*
- * IPv6 over IPv6 tunnel device
+ * IPv6 tunneling device
* Linux INET6 implementation
*
* Authors:
* Ville Nuorvala <vnuorval@tcs.hut.fi>
+ * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
*
* $Id$
*
* Based on:
- * linux/net/ipv6/sit.c
+ * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
*
* RFC 2473
*
@@ -24,6 +25,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
+#include <linux/icmp.h>
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
@@ -41,6 +43,7 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -51,7 +54,7 @@
#include <net/inet_ecn.h>
MODULE_AUTHOR("Ville Nuorvala");
-MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
#define IPV6_TLV_TEL_DST_SIZE 8
@@ -63,6 +66,7 @@ MODULE_LICENSE("GPL");
#endif
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
#define HASH_SIZE 32
@@ -70,12 +74,12 @@ MODULE_LICENSE("GPL");
(addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
(HASH_SIZE - 1))
-static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
-static int ip6ip6_tnl_dev_init(struct net_device *dev);
-static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+static int ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
/* the IPv6 tunnel fallback device */
-static struct net_device *ip6ip6_fb_tnl_dev;
+static struct net_device *ip6_fb_tnl_dev;
/* lists for storing tunnels in use */
@@ -84,7 +88,7 @@ static struct ip6_tnl *tnls_wc[1];
static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
/* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6ip6_lock);
+static DEFINE_RWLOCK(ip6_tnl_lock);
static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
@@ -115,7 +119,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
}
/**
- * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -126,7 +130,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
**/
static struct ip6_tnl *
-ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
@@ -145,18 +149,18 @@ ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
}
/**
- * ip6ip6_bucket - get head of list matching given tunnel parameters
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
* @p: parameters containing tunnel end-points
*
* Description:
- * ip6ip6_bucket() returns the head of the list matching the
+ * ip6_tnl_bucket() returns the head of the list matching the
* &struct in6_addr entries laddr and raddr in @p.
*
* Return: head of IPv6 tunnel list
**/
static struct ip6_tnl **
-ip6ip6_bucket(struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_parm *p)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
@@ -171,36 +175,36 @@ ip6ip6_bucket(struct ip6_tnl_parm *p)
}
/**
- * ip6ip6_tnl_link - add tunnel to hash table
+ * ip6_tnl_link - add tunnel to hash table
* @t: tunnel to be added
**/
static void
-ip6ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl *t)
{
- struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+ struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
t->next = *tp;
- write_lock_bh(&ip6ip6_lock);
+ write_lock_bh(&ip6_tnl_lock);
*tp = t;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
}
/**
- * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * ip6_tnl_unlink - remove tunnel from hash table
* @t: tunnel to be removed
**/
static void
-ip6ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl *t)
{
struct ip6_tnl **tp;
- for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+ for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
if (t == *tp) {
- write_lock_bh(&ip6ip6_lock);
+ write_lock_bh(&ip6_tnl_lock);
*tp = t->next;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
break;
}
}
@@ -237,12 +241,12 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
if (i == IP6_TNL_MAX)
goto failed;
}
- dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+ dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
if (dev == NULL)
goto failed;
t = netdev_priv(dev);
- dev->init = ip6ip6_tnl_dev_init;
+ dev->init = ip6_tnl_dev_init;
t->parms = *p;
if ((err = register_netdevice(dev)) < 0) {
@@ -250,19 +254,19 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
goto failed;
}
dev_hold(dev);
- ip6ip6_tnl_link(t);
+ ip6_tnl_link(t);
return t;
failed:
return NULL;
}
/**
- * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ * ip6_tnl_locate - find or create tunnel matching given parameters
* @p: tunnel parameters
* @create: != 0 if allowed to create new tunnel if no match found
*
* Description:
- * ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ * ip6_tnl_locate() first tries to locate an existing tunnel
* based on @parms. If this is unsuccessful, but @create is set a new
* tunnel device is created and registered for use.
*
@@ -270,13 +274,13 @@ failed:
* matching tunnel or NULL
**/
-static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
struct ip6_tnl *t;
- for (t = *ip6ip6_bucket(p); t; t = t->next) {
+ for (t = *ip6_tnl_bucket(p); t; t = t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr))
return t;
@@ -287,24 +291,24 @@ static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
}
/**
- * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
* @dev: the device to be destroyed
*
* Description:
- * ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ * ip6_tnl_dev_uninit() removes tunnel from its list
**/
static void
-ip6ip6_tnl_dev_uninit(struct net_device *dev)
+ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- if (dev == ip6ip6_fb_tnl_dev) {
- write_lock_bh(&ip6ip6_lock);
+ if (dev == ip6_fb_tnl_dev) {
+ write_lock_bh(&ip6_tnl_lock);
tnls_wc[0] = NULL;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
} else {
- ip6ip6_tnl_unlink(t);
+ ip6_tnl_unlink(t);
}
ip6_tnl_dst_reset(t);
dev_put(dev);
@@ -372,16 +376,16 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
}
/**
- * ip6ip6_err - tunnel error handler
+ * ip6_tnl_err - tunnel error handler
*
* Description:
- * ip6ip6_err() should handle errors in the tunnel according
+ * ip6_tnl_err() should handle errors in the tunnel according
* to the specifications in RFC 2473.
**/
static int
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __be32 info)
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
+ int *type, int *code, int *msg, __be32 *info, int offset)
{
struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
struct ip6_tnl *t;
@@ -396,13 +400,16 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
in trouble since we might need the source address for further
processing of the error. */
- read_lock(&ip6ip6_lock);
- if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ read_lock(&ip6_tnl_lock);
+ if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ goto out;
+
+ if (t->parms.proto != ipproto && t->parms.proto != 0)
goto out;
err = 0;
- switch (type) {
+ switch (*type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
@@ -414,7 +421,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_msg = 1;
break;
case ICMPV6_TIME_EXCEED:
- if (code == ICMPV6_EXC_HOPLIMIT) {
+ if ((*code) == ICMPV6_EXC_HOPLIMIT) {
if (net_ratelimit())
printk(KERN_WARNING
"%s: Too small hop limit or "
@@ -425,10 +432,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
case ICMPV6_PARAMPROB:
teli = 0;
- if (code == ICMPV6_HDR_FIELD)
+ if ((*code) == ICMPV6_HDR_FIELD)
teli = parse_tlv_tnl_enc_lim(skb, skb->data);
- if (teli && teli == ntohl(info) - 2) {
+ if (teli && teli == ntohl(*info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
if (net_ratelimit())
@@ -445,7 +452,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
- mtu = ntohl(info) - offset;
+ mtu = ntohl(*info) - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
@@ -458,20 +465,144 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
}
- if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+
+ *type = rel_type;
+ *code = rel_code;
+ *info = rel_info;
+ *msg = rel_msg;
+
+out:
+ read_unlock(&ip6_tnl_lock);
+ return err;
+}
+
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ int rel_msg = 0;
+ int rel_type = type;
+ int rel_code = code;
+ __u32 rel_info = info;
+ int err;
+ struct sk_buff *skb2;
+ struct iphdr *eiph;
+ struct flowi fl;
+ struct rtable *rt;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg == 0)
+ return 0;
+
+ switch (rel_type) {
+ case ICMPV6_DEST_UNREACH:
+ if (rel_code != ICMPV6_ADDR_UNREACH)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ if (rel_code != 0)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_FRAG_NEEDED;
+ break;
+ default:
+ return 0;
+ }
+
+ if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+ return 0;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ return 0;
+
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, offset);
+ skb_reset_network_header(skb2);
+ eiph = ip_hdr(skb2);
+
+ /* Try to guess incoming interface */
+ memset(&fl, 0, sizeof(fl));
+ fl.fl4_dst = eiph->saddr;
+ fl.fl4_tos = RT_TOS(eiph->tos);
+ fl.proto = IPPROTO_IPIP;
+ if (ip_route_output_key(&rt, &fl))
+ goto out;
+
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags & RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ fl.fl4_dst = eiph->daddr;
+ fl.fl4_src = eiph->saddr;
+ fl.fl4_tos = eiph->tos;
+ if (ip_route_output_key(&rt, &fl) ||
+ rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+ ip_rt_put(rt);
+ goto out;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+ skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_TUNNEL)
+ goto out;
+ }
+
+ /* change mtu on this route */
+ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+ if (rel_info > dst_mtu(skb2->dst))
+ goto out;
+
+ skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+ rel_info = htonl(rel_info);
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+
+out:
+ kfree_skb(skb2);
+ return 0;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ int rel_msg = 0;
+ int rel_type = type;
+ int rel_code = code;
+ __u32 rel_info = info;
+ int err;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
struct rt6_info *rt;
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
- goto out;
+ return 0;
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, offset);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
- rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+ rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
if (rt && rt->rt6i_dev)
skb2->dev = rt->rt6i_dev;
@@ -483,19 +614,34 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
kfree_skb(skb2);
}
-out:
- read_unlock(&ip6ip6_lock);
- return err;
+
+ return 0;
}
-static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
- struct sk_buff *skb)
+static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
- struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+ __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
- if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
- IP6_ECN_set_ce(inner_iph);
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+ if (INET_ECN_is_ce(dsfield))
+ IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
+{
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
+
static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
@@ -519,53 +665,61 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
}
/**
- * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
* @skb: received socket buffer
+ * @protocol: ethernet protocol ID
+ * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
*
* Return: 0
**/
-static int
-ip6ip6_rcv(struct sk_buff *skb)
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+ __u8 ipproto,
+ void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
{
- struct ipv6hdr *ipv6h;
struct ip6_tnl *t;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- ipv6h = skb->nh.ipv6h;
+ read_lock(&ip6_tnl_lock);
- read_lock(&ip6ip6_lock);
+ if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+ if (t->parms.proto != ipproto && t->parms.proto != 0) {
+ read_unlock(&ip6_tnl_lock);
+ goto discard;
+ }
- if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
goto discard;
}
if (!ip6_tnl_rcv_ctl(t)) {
t->stat.rx_dropped++;
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
goto discard;
}
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
- skb->protocol = htons(ETH_P_IPV6);
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
+ skb->protocol = htons(protocol);
skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
skb->dev = t->dev;
dst_release(skb->dst);
skb->dst = NULL;
nf_reset(skb);
- if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
- ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
- ip6ip6_ecn_decapsulate(ipv6h, skb);
+
+ dscp_ecn_decapsulate(t, ipv6h, skb);
+
t->stat.rx_packets++;
t->stat.rx_bytes += skb->len;
netif_rx(skb);
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
return 0;
}
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
return 1;
discard:
@@ -573,6 +727,18 @@ discard:
return 0;
}
+static int ip4ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+ ip4ip6_dscp_ecn_decapsulate);
+}
+
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+ ip6ip6_dscp_ecn_decapsulate);
+}
+
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
@@ -593,7 +759,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
}
/**
- * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
* @hdr: IPv6 header from the incoming packet
*
@@ -607,7 +773,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
**/
static inline int
-ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
{
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
@@ -641,72 +807,49 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
return ret;
}
/**
- * ip6ip6_tnl_xmit - encapsulate packet and send
+ * ip6_tnl_xmit2 - encapsulate packet and send
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @dsfield: dscp code for outer header
+ * @fl: flow of tunneled packet
+ * @encap_limit: encapsulation limit
+ * @pmtu: Path MTU is stored if packet is too big
*
* Description:
* Build new header and do some sanity checks on the packet before sending
* it.
*
* Return:
- * 0
+ * 0 on success
+ * -1 fail
+ * %-EMSGSIZE message too big. return mtu in this case.
**/
-static int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi *fl,
+ int encap_limit,
+ __u32 *pmtu)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->stat;
- struct ipv6hdr *ipv6h = skb->nh.ipv6h;
- int encap_limit = -1;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
- __u16 offset;
- struct flowi fl;
struct dst_entry *dst;
struct net_device *tdev;
int mtu;
int max_headroom = sizeof(struct ipv6hdr);
u8 proto;
- int err;
+ int err = -1;
int pkt_len;
- int dsfield;
-
- if (t->recursion++) {
- stats->collisions++;
- goto tx_err;
- }
- if (skb->protocol != htons(ETH_P_IPV6) ||
- !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
- goto tx_err;
-
- if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2, skb->dev);
- goto tx_err;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl, &t->fl, sizeof (fl));
- proto = fl.proto;
-
- dsfield = ipv6_get_dsfield(ipv6h);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
if ((dst = ip6_tnl_dst_check(t)) != NULL)
dst_hold(dst);
else {
- dst = ip6_route_output(NULL, &fl);
+ dst = ip6_route_output(NULL, fl);
- if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+ if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
goto tx_err_link_failure;
}
@@ -730,7 +873,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
if (skb->len > mtu) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ *pmtu = mtu;
+ err = -EMSGSIZE;
goto tx_err_dst_release;
}
@@ -754,22 +898,24 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
dst_release(skb->dst);
skb->dst = dst_clone(dst);
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
+ proto = fl->proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
- skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
- ipv6h = skb->nh.ipv6h;
- *(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ipv6h = ipv6_hdr(skb);
+ *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
dsfield = INET_ECN_encapsulate(0, dsfield);
ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
- ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
- ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+ ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
+ ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
nf_reset(skb);
pkt_len = skb->len;
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
@@ -783,13 +929,131 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
stats->tx_aborted_errors++;
}
ip6_tnl_dst_store(t, dst);
- t->recursion--;
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
+ return err;
+}
+
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi fl;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t))
+ return -1;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl, &t->fl, sizeof (fl));
+ fl.proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ fl.fl6_flowlabel |= ntohl(((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK);
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi fl;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+ return -1;
+
+ offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl, &t->fl, sizeof (fl));
+ fl.proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->stat;
+ int ret;
+
+ if (t->recursion++) {
+ t->stat.collisions++;
+ goto tx_err;
+ }
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ ret = ip4ip6_tnl_xmit(skb, dev);
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ ret = ip6ip6_tnl_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ t->recursion--;
+ return 0;
+
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
@@ -817,7 +1081,7 @@ static void ip6_tnl_set_cap(struct ip6_tnl *t)
}
}
-static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
struct ip6_tnl_parm *p = &t->parms;
@@ -870,17 +1134,17 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
}
/**
- * ip6ip6_tnl_change - update the tunnel parameters
+ * ip6_tnl_change - update the tunnel parameters
* @t: tunnel to be changed
* @p: tunnel configuration parameters
* @active: != 0 if tunnel is ready for use
*
* Description:
- * ip6ip6_tnl_change() updates the tunnel parameters
+ * ip6_tnl_change() updates the tunnel parameters
**/
static int
-ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
{
ipv6_addr_copy(&t->parms.laddr, &p->laddr);
ipv6_addr_copy(&t->parms.raddr, &p->raddr);
@@ -889,19 +1153,20 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
t->parms.encap_limit = p->encap_limit;
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
+ t->parms.proto = p->proto;
ip6_tnl_dst_reset(t);
- ip6ip6_tnl_link_config(t);
+ ip6_tnl_link_config(t);
return 0;
}
/**
- * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
* @ifr: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
- * ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ * ip6_tnl_ioctl() is used for managing IPv6 tunnels
* from userspace.
*
* The possible commands are the following:
@@ -923,7 +1188,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
**/
static int
-ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
@@ -931,12 +1196,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
- if (dev == ip6ip6_fb_tnl_dev) {
+ if (dev == ip6_fb_tnl_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
err = -EFAULT;
break;
}
- t = ip6ip6_tnl_locate(&p, 0);
+ t = ip6_tnl_locate(&p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -954,10 +1219,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -EINVAL;
- if (p.proto != IPPROTO_IPV6)
+ if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+ p.proto != 0)
break;
- t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
- if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
+ if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -966,9 +1232,9 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6ip6_tnl_unlink(t);
- err = ip6ip6_tnl_change(t, &p);
- ip6ip6_tnl_link(t);
+ ip6_tnl_unlink(t);
+ err = ip6_tnl_change(t, &p);
+ ip6_tnl_link(t);
netdev_state_change(dev);
}
if (t) {
@@ -984,15 +1250,15 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
break;
- if (dev == ip6ip6_fb_tnl_dev) {
+ if (dev == ip6_fb_tnl_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL)
+ if ((t = ip6_tnl_locate(&p, 0)) == NULL)
break;
err = -EPERM;
- if (t->dev == ip6ip6_fb_tnl_dev)
+ if (t->dev == ip6_fb_tnl_dev)
break;
dev = t->dev;
}
@@ -1006,20 +1272,20 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
/**
- * ip6ip6_tnl_get_stats - return the stats for tunnel device
+ * ip6_tnl_get_stats - return the stats for tunnel device
* @dev: virtual device associated with tunnel
*
* Return: stats for device
**/
static struct net_device_stats *
-ip6ip6_tnl_get_stats(struct net_device *dev)
+ip6_tnl_get_stats(struct net_device *dev)
{
return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
}
/**
- * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
* @dev: virtual device associated with tunnel
* @new_mtu: the new mtu
*
@@ -1029,7 +1295,7 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
**/
static int
-ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < IPV6_MIN_MTU) {
return -EINVAL;
@@ -1039,22 +1305,22 @@ ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
}
/**
- * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * ip6_tnl_dev_setup - setup virtual tunnel device
* @dev: virtual device associated with tunnel
*
* Description:
* Initialize function pointers and device parameters
**/
-static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+static void ip6_tnl_dev_setup(struct net_device *dev)
{
SET_MODULE_OWNER(dev);
- dev->uninit = ip6ip6_tnl_dev_uninit;
+ dev->uninit = ip6_tnl_dev_uninit;
dev->destructor = free_netdev;
- dev->hard_start_xmit = ip6ip6_tnl_xmit;
- dev->get_stats = ip6ip6_tnl_get_stats;
- dev->do_ioctl = ip6ip6_tnl_ioctl;
- dev->change_mtu = ip6ip6_tnl_change_mtu;
+ dev->hard_start_xmit = ip6_tnl_xmit;
+ dev->get_stats = ip6_tnl_get_stats;
+ dev->do_ioctl = ip6_tnl_ioctl;
+ dev->change_mtu = ip6_tnl_change_mtu;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1065,50 +1331,56 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
/**
- * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
* @dev: virtual device associated with tunnel
**/
static inline void
-ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- t->fl.proto = IPPROTO_IPV6;
t->dev = dev;
strcpy(t->parms.name, dev->name);
}
/**
- * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
* @dev: virtual device associated with tunnel
**/
static int
-ip6ip6_tnl_dev_init(struct net_device *dev)
+ip6_tnl_dev_init(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6ip6_tnl_dev_init_gen(dev);
- ip6ip6_tnl_link_config(t);
+ ip6_tnl_dev_init_gen(dev);
+ ip6_tnl_link_config(t);
return 0;
}
/**
- * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
* @dev: fallback device
*
* Return: 0
**/
static int
-ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+ip6_fb_tnl_dev_init(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6ip6_tnl_dev_init_gen(dev);
+ ip6_tnl_dev_init_gen(dev);
+ t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
tnls_wc[0] = t;
return 0;
}
+static struct xfrm6_tunnel ip4ip6_handler = {
+ .handler = ip4ip6_rcv,
+ .err_handler = ip4ip6_err,
+ .priority = 1,
+};
+
static struct xfrm6_tunnel ip6ip6_handler = {
.handler = ip6ip6_rcv,
.err_handler = ip6ip6_err,
@@ -1125,30 +1397,40 @@ static int __init ip6_tunnel_init(void)
{
int err;
+ if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
+ printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
+ err = -EAGAIN;
+ goto out;
+ }
+
if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
- printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
- return -EAGAIN;
+ printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
+ err = -EAGAIN;
+ goto unreg_ip4ip6;
}
- ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6ip6_tnl_dev_setup);
+ ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+ ip6_tnl_dev_setup);
- if (!ip6ip6_fb_tnl_dev) {
+ if (!ip6_fb_tnl_dev) {
err = -ENOMEM;
goto fail;
}
- ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+ ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
- if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
- free_netdev(ip6ip6_fb_tnl_dev);
+ if ((err = register_netdev(ip6_fb_tnl_dev))) {
+ free_netdev(ip6_fb_tnl_dev);
goto fail;
}
return 0;
fail:
xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
+unreg_ip4ip6:
+ xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out:
return err;
}
-static void __exit ip6ip6_destroy_tunnels(void)
+static void __exit ip6_tnl_destroy_tunnels(void)
{
int h;
struct ip6_tnl *t;
@@ -1168,11 +1450,14 @@ static void __exit ip6ip6_destroy_tunnels(void)
static void __exit ip6_tunnel_cleanup(void)
{
+ if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
+
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
- printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
rtnl_lock();
- ip6ip6_destroy_tunnels();
+ ip6_tnl_destroy_tunnels();
rtnl_unlock();
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75de..1ee50b5782e1 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -79,9 +79,9 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
/* Remove ipcomp header and decompress original payload */
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
ipch = (void *)skb->data;
- skb->h.raw = skb->nh.raw + sizeof(*ipch);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
__skb_pull(skb, sizeof(*ipch));
/* decompression */
@@ -111,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->truesize += dlen - plen;
__skb_put(skb, dlen - plen);
- memcpy(skb->data, scratch, dlen);
+ skb_copy_to_linear_data(skb, scratch, dlen);
err = ipch->nexthdr;
out_put_cpu:
@@ -124,15 +124,13 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ipv6hdr *top_iph;
- int hdr_len;
struct ipv6_comp_hdr *ipch;
struct ipcomp_data *ipcd = x->data;
int plen, dlen;
u8 *start, *scratch;
struct crypto_comp *tfm;
int cpu;
-
- hdr_len = skb->h.raw - skb->data;
+ int hdr_len = skb_transport_offset(skb);
/* check whether datagram len is larger than threshold */
if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -145,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
/* compression */
plen = skb->len - hdr_len;
dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->h.raw;
+ start = skb_transport_header(skb);
cpu = get_cpu();
scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
@@ -166,10 +164,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
ipch = (struct ipv6_comp_hdr *)start;
- ipch->nexthdr = *skb->nh.raw;
+ ipch->nexthdr = *skb_network_header(skb);
ipch->flags = 0;
ipch->cpi = htons((u16 )ntohl(x->id.spi));
- *skb->nh.raw = IPPROTO_COMP;
+ *skb_network_header(skb) = IPPROTO_COMP;
out_ok:
return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4e0561a082d0..aa3d07c52a8f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -101,14 +101,14 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
err = -EPROTONOSUPPORT;
rcu_read_lock();
ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
if (likely(ops && ops->gso_send_check)) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = ops->gso_send_check(skb);
}
rcu_read_unlock();
@@ -137,14 +137,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
rcu_read_lock();
ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
if (likely(ops && ops->gso_segment)) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
segs = ops->gso_segment(skb, features);
}
rcu_read_unlock();
@@ -153,7 +153,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
goto out;
for (skb = segs; skb; skb = skb->next) {
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(skb->len - skb->mac_len -
sizeof(*ipv6h));
}
@@ -413,7 +413,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
/* routing header option needs extra check */
- if (optname == IPV6_RTHDR && opt->srcrt) {
+ if (optname == IPV6_RTHDR && opt && opt->srcrt) {
struct ipv6_rt_hdr *rthdr = opt->srcrt;
switch (rthdr->type) {
case IPV6_SRCRT_TYPE_0:
@@ -694,7 +694,7 @@ done:
retv = ip6_ra_control(sk, val, NULL);
break;
case IPV6_MTU_DISCOVER:
- if (val<0 || val>2)
+ if (val<0 || val>3)
goto e_inval;
np->pmtudisc = val;
retv = 0;
@@ -761,6 +761,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
return err;
}
+EXPORT_SYMBOL(ipv6_setsockopt);
#ifdef CONFIG_COMPAT
int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
@@ -796,18 +797,37 @@ EXPORT_SYMBOL(compat_ipv6_setsockopt);
#endif
static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
- char __user *optval, int len)
+ int optname, char __user *optval, int len)
{
struct ipv6_opt_hdr *hdr;
- if (!opt || !opt->hopopt)
+ if (!opt)
return 0;
- hdr = opt->hopopt;
- len = min_t(int, len, ipv6_optlen(hdr));
- if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+ switch(optname) {
+ case IPV6_HOPOPTS:
+ hdr = opt->hopopt;
+ break;
+ case IPV6_RTHDRDSTOPTS:
+ hdr = opt->dst0opt;
+ break;
+ case IPV6_RTHDR:
+ hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+ break;
+ case IPV6_DSTOPTS:
+ hdr = opt->dst1opt;
+ break;
+ default:
+ return -EINVAL; /* should not happen */
+ }
+
+ if (!hdr)
+ return 0;
+
+ len = min_t(unsigned int, len, ipv6_optlen(hdr));
+ if (copy_to_user(optval, hdr, len));
return -EFAULT;
- return len;
+ return ipv6_optlen(hdr);
}
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -945,7 +965,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
lock_sock(sk);
len = ipv6_getsockopt_sticky(sk, np->opt,
- optval, len);
+ optname, optval, len);
release_sock(sk);
return put_user(len, optlen);
}
@@ -1066,6 +1086,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
return err;
}
+EXPORT_SYMBOL(ipv6_getsockopt);
+
#ifdef CONFIG_COMPAT
int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index e12e3d4fccec..000000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,36 +0,0 @@
-
-#include <linux/module.h>
-#include <net/protocol.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-
-EXPORT_SYMBOL(icmpv6_send);
-EXPORT_SYMBOL(icmpv6_statistics);
-EXPORT_SYMBOL(icmpv6_err_convert);
-EXPORT_SYMBOL(ndisc_mc_map);
-EXPORT_SYMBOL(register_inet6addr_notifier);
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(ipv6_setsockopt);
-EXPORT_SYMBOL(ipv6_getsockopt);
-EXPORT_SYMBOL(inet6_register_protosw);
-EXPORT_SYMBOL(inet6_unregister_protosw);
-EXPORT_SYMBOL(inet6_add_protocol);
-EXPORT_SYMBOL(inet6_del_protocol);
-EXPORT_SYMBOL(ip6_xmit);
-EXPORT_SYMBOL(inet6_release);
-EXPORT_SYMBOL(inet6_bind);
-EXPORT_SYMBOL(inet6_getname);
-EXPORT_SYMBOL(inet6_ioctl);
-EXPORT_SYMBOL(ipv6_get_saddr);
-EXPORT_SYMBOL(ipv6_chk_addr);
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-#ifdef CONFIG_XFRM
-EXPORT_SYMBOL(xfrm6_rcv);
-EXPORT_SYMBOL(xfrm6_input_addr);
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-#endif
-EXPORT_SYMBOL(rt6_lookup);
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a8d6625ec782..3e308fb41b49 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -988,7 +988,7 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
return 0;
- pic = (struct icmp6hdr *)skb->h.raw;
+ pic = icmp6_hdr(skb);
switch (pic->icmp6_type) {
case ICMPV6_MGM_QUERY:
@@ -1167,11 +1167,11 @@ int igmp6_event_query(struct sk_buff *skb)
return -EINVAL;
/* compute payload length excluding extension headers */
- len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
- len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
+ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+ len -= skb_network_header_len(skb);
/* Drop queries with not link local source */
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
return -EINVAL;
idev = in6_dev_get(skb->dev);
@@ -1179,7 +1179,7 @@ int igmp6_event_query(struct sk_buff *skb)
if (idev == NULL)
return 0;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
group = (struct in6_addr *) (hdr + 1);
group_type = ipv6_addr_type(group);
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
in6_dev_put(idev);
return -EINVAL;
}
- mlh2 = (struct mld2_query *) skb->h.raw;
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
if (!max_delay)
max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
in6_dev_put(idev);
return -EINVAL;
}
- mlh2 = (struct mld2_query *) skb->h.raw;
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
mark = 1;
}
} else {
@@ -1300,10 +1300,10 @@ int igmp6_event_report(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
return -EINVAL;
- hdr = (struct icmp6hdr*) skb->h.raw;
+ hdr = icmp6_hdr(skb);
/* Drop reports with not link local source */
- addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+ addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
if (addr_type != IPV6_ADDR_ANY &&
!(addr_type&IPV6_ADDR_LINKLOCAL))
return -EINVAL;
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- if (ipv6_get_lladdr(dev, &addr_buf)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -1423,8 +1423,9 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
- pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
- skb->h.raw = (unsigned char *)pmr;
+ skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+ skb_put(skb, sizeof(*pmr));
+ pmr = (struct mld2_report *)skb_transport_header(skb);
pmr->type = ICMPV6_MLD2_REPORT;
pmr->resv1 = 0;
pmr->csum = 0;
@@ -1441,7 +1442,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
unsigned char ha[MAX_ADDR_LEN];
int err;
- ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
+ ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
if (err < 0) {
kfree_skb(skb);
@@ -1459,20 +1460,21 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
static void mld_sendpack(struct sk_buff *skb)
{
- struct ipv6hdr *pip6 = skb->nh.ipv6h;
- struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+ struct ipv6hdr *pip6 = ipv6_hdr(skb);
+ struct mld2_report *pmr =
+ (struct mld2_report *)skb_transport_header(skb);
int payload_len, mldlen;
struct inet6_dev *idev = in6_dev_get(skb->dev);
int err;
IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
- sizeof(struct ipv6hdr);
- mldlen = skb->tail - skb->h.raw;
+ payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
+ mldlen = skb->tail - skb->transport_header;
pip6->payload_len = htons(payload_len);
pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
- IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+ IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
+ mldlen, 0));
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
mld_dev_queue_xmit);
if (!err) {
@@ -1506,7 +1508,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_auxwords = 0;
pgr->grec_nsrcs = 0;
pgr->grec_mca = pmc->mca_addr; /* structure copy */
- pmr = (struct mld2_report *)skb->h.raw;
+ pmr = (struct mld2_report *)skb_transport_header(skb);
pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
*ppgr = pgr;
return skb;
@@ -1539,7 +1541,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
if (!*psf_list)
goto empty_source;
- pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+ pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
/* EX and TO_EX get a fresh packet, if needed */
if (truncate) {
@@ -1791,7 +1793,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- if (ipv6_get_lladdr(dev, &addr_buf)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -2329,9 +2331,8 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
struct ifmcaddr6 *im = NULL;
struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -2358,7 +2359,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
@@ -2473,9 +2474,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
struct ifmcaddr6 *im = NULL;
struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ state->im = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (unlikely(idev == NULL))
@@ -2511,7 +2512,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
goto out;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed6..13b7160fb892 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -90,23 +90,26 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
{
struct ip6_mh *mh;
- if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
- !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3))))
return -1;
- mh = (struct ip6_mh *)skb->h.raw;
+ mh = (struct ip6_mh *)skb_transport_header(skb);
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
- mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+ skb_network_header(skb)));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
mh->ip6mh_proto);
- mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+ skb_network_header(skb)));
return -1;
}
@@ -122,12 +125,12 @@ struct mip6_report_rate_limiter {
};
static struct mip6_report_rate_limiter mip6_report_rl = {
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
};
static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
@@ -152,10 +155,10 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
iph = (struct ipv6hdr *)skb->data;
iph->payload_len = htons(skb->len - sizeof(*iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_DSTOPTS;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_DSTOPTS;
- dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+ dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
dstopt->nexthdr = nexthdr;
hao = mip6_padn((char *)(dstopt + 1),
@@ -215,21 +218,22 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
if (likely(opt->dsthao)) {
offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
if (likely(offset >= 0))
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + offset);
}
skb_get_timestamp(skb, &stamp);
- if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
- hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+ if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+ hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
opt->iif))
goto out;
memset(&sel, 0, sizeof(sel));
- memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+ memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
sizeof(sel.daddr));
sel.prefixlen_d = 128;
- memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
sizeof(sel.saddr));
sel.prefixlen_s = 128;
sel.family = AF_INET6;
@@ -253,11 +257,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -288,7 +294,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
@@ -361,10 +367,10 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
iph = (struct ipv6hdr *)skb->data;
iph->payload_len = htons(skb->len - sizeof(*iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_ROUTING;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_ROUTING;
- rt2 = (struct rt2_hdr *)skb->h.raw;
+ rt2 = (struct rt2_hdr *)skb_transport_header(skb);
rt2->rt_hdr.nexthdr = nexthdr;
rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
@@ -383,11 +389,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -397,7 +405,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
case NEXTHDR_ROUTING:
if (offset + 3 <= packet_len) {
struct ipv6_rt_hdr *rt;
- rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+ rt = (struct ipv6_rt_hdr *)(nh + offset);
if (rt->type != 0)
return offset;
}
@@ -417,7 +425,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 121f31c283f8..d8b36451bada 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -319,6 +319,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
return -EINVAL;
}
+EXPORT_SYMBOL(ndisc_mc_map);
+
static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
{
const u32 *p32 = pkey;
@@ -425,36 +427,23 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
security_sk_classify_flow(ndisc_socket->sk, fl);
}
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- struct in6_addr *daddr, struct in6_addr *solicited_addr,
- int router, int solicited, int override, int inc_opt)
+static void __ndisc_send(struct net_device *dev,
+ struct neighbour *neigh,
+ struct in6_addr *daddr, struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h, struct in6_addr *target,
+ int llinfo, int icmp6_mib_outnd)
{
- struct in6_addr tmpaddr;
- struct inet6_ifaddr *ifp;
- struct inet6_dev *idev;
struct flowi fl;
- struct dst_entry* dst;
+ struct dst_entry *dst;
struct sock *sk = ndisc_socket->sk;
- struct in6_addr *src_addr;
- struct nd_msg *msg;
- int len;
struct sk_buff *skb;
+ struct icmp6hdr *hdr;
+ struct inet6_dev *idev;
+ int len;
int err;
+ u8 *opt;
- len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- /* for anycast or proxy, solicited_addr != src_addr */
- ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
- if (ifp) {
- src_addr = solicited_addr;
- in6_ifa_put(ifp);
- } else {
- if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
- return;
- src_addr = &tmpaddr;
- }
-
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+ ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr,
dev->ifindex);
dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
@@ -465,60 +454,57 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
if (err < 0)
return;
- if (inc_opt) {
- if (dev->addr_len)
- len += ndisc_opt_addr_space(dev);
- else
- inc_opt = 0;
- }
+ if (!dev->addr_len)
+ llinfo = 0;
+
+ len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
+ if (llinfo)
+ len += ndisc_opt_addr_space(dev);
skb = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
len + LL_RESERVED_SPACE(dev)),
1, &err);
-
- if (skb == NULL) {
+ if (!skb) {
ND_PRINTK0(KERN_ERR
- "ICMPv6 NA: %s() failed to allocate an skb.\n",
+ "ICMPv6 ND: %s() failed to allocate an skb.\n",
__FUNCTION__);
dst_release(dst);
return;
}
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
-
- msg = (struct nd_msg *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)msg;
+ ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
- msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- msg->icmph.icmp6_code = 0;
- msg->icmph.icmp6_cksum = 0;
+ skb->transport_header = skb->tail;
+ skb_put(skb, len);
- msg->icmph.icmp6_unused = 0;
- msg->icmph.icmp6_router = router;
- msg->icmph.icmp6_solicited = solicited;
- msg->icmph.icmp6_override = override;
+ hdr = (struct icmp6hdr *)skb_transport_header(skb);
+ memcpy(hdr, icmp6h, sizeof(*hdr));
- /* Set the target address. */
- ipv6_addr_copy(&msg->target, solicited_addr);
+ opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
+ if (target) {
+ ipv6_addr_copy((struct in6_addr *)opt, target);
+ opt += sizeof(*target);
+ }
- if (inc_opt)
- ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+ if (llinfo)
+ ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
dev->addr_len, dev->type);
- /* checksum */
- msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) msg,
- len, 0));
+ hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) hdr,
+ len, 0));
skb->dst = dst;
+
idev = in6_dev_get(dst->dev);
IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+ ICMP6_INC_STATS(idev, icmp6_mib_outnd);
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
}
@@ -526,165 +512,95 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
in6_dev_put(idev);
}
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ struct in6_addr *daddr, struct in6_addr *solicited_addr,
+ int router, int solicited, int override, int inc_opt)
+{
+ struct in6_addr tmpaddr;
+ struct inet6_ifaddr *ifp;
+ struct in6_addr *src_addr;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+ };
+
+ /* for anycast or proxy, solicited_addr != src_addr */
+ ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+ if (ifp) {
+ src_addr = solicited_addr;
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ override = 0;
+ in6_ifa_put(ifp);
+ } else {
+ if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+ return;
+ src_addr = &tmpaddr;
+ }
+
+ icmp6h.icmp6_router = router;
+ icmp6h.icmp6_solicited = solicited;
+ icmp6h.icmp6_override = override;
+
+ __ndisc_send(dev, neigh, daddr, src_addr,
+ &icmp6h, solicited_addr,
+ inc_opt ? ND_OPT_TARGET_LL_ADDR : 0,
+ ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+}
+
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
struct in6_addr *solicit,
struct in6_addr *daddr, struct in6_addr *saddr)
{
- struct flowi fl;
- struct dst_entry* dst;
- struct inet6_dev *idev;
- struct sock *sk = ndisc_socket->sk;
- struct sk_buff *skb;
- struct nd_msg *msg;
struct in6_addr addr_buf;
- int len;
- int err;
- int send_llinfo;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+ };
if (saddr == NULL) {
- if (ipv6_get_lladdr(dev, &addr_buf))
+ if (ipv6_get_lladdr(dev, &addr_buf,
+ (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
return;
saddr = &addr_buf;
}
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
- dev->ifindex);
-
- dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
- if (!dst)
- return;
-
- err = xfrm_lookup(&dst, &fl, NULL, 0);
- if (err < 0)
- return;
-
- len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
- send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
- if (send_llinfo)
- len += ndisc_opt_addr_space(dev);
-
- skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
- 1, &err);
- if (skb == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 NA: %s() failed to allocate an skb.\n",
- __FUNCTION__);
- dst_release(dst);
- return;
- }
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
- msg = (struct nd_msg *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)msg;
- msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
- msg->icmph.icmp6_code = 0;
- msg->icmph.icmp6_cksum = 0;
- msg->icmph.icmp6_unused = 0;
-
- /* Set the target address. */
- ipv6_addr_copy(&msg->target, solicit);
-
- if (send_llinfo)
- ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
- dev->addr_len, dev->type);
-
- /* checksum */
- msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) msg,
- len, 0));
- /* send it! */
- skb->dst = dst;
- idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
- if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
- }
-
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ __ndisc_send(dev, neigh, daddr, saddr,
+ &icmp6h, solicit,
+ !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0,
+ ICMP6_MIB_OUTNEIGHBORSOLICITS);
}
void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr)
{
- struct flowi fl;
- struct dst_entry* dst;
- struct inet6_dev *idev;
- struct sock *sk = ndisc_socket->sk;
- struct sk_buff *skb;
- struct icmp6hdr *hdr;
- __u8 * opt;
- int len;
- int err;
-
- ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
- dev->ifindex);
-
- dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
- if (!dst)
- return;
-
- err = xfrm_lookup(&dst, &fl, NULL, 0);
- if (err < 0)
- return;
-
- len = sizeof(struct icmp6hdr);
- if (dev->addr_len)
- len += ndisc_opt_addr_space(dev);
-
- skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
- 1, &err);
- if (skb == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 RS: %s() failed to allocate an skb.\n",
- __FUNCTION__);
- dst_release(dst);
- return;
- }
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
- hdr = (struct icmp6hdr *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)hdr;
- hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
- hdr->icmp6_code = 0;
- hdr->icmp6_cksum = 0;
- hdr->icmp6_unused = 0;
-
- opt = (u8*) (hdr + 1);
-
- if (dev->addr_len)
- ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
- dev->addr_len, dev->type);
-
- /* checksum */
- hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) hdr, len, 0));
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_ROUTER_SOLICITATION,
+ };
+ int send_sllao = dev->addr_len;
- /* send it! */
- skb->dst = dst;
- idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
- if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * According to section 2.2 of RFC 4429, we must not
+ * send router solicitations with a sllao from
+ * optimistic addresses, but we may send the solicitation
+ * if we don't include the sllao. So here we check
+ * if our address is optimistic, and if so, we
+ * supress the inclusion of the sllao.
+ */
+ if (send_sllao) {
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
+ if (ifp) {
+ if (ifp->flags & IFA_F_OPTIMISTIC) {
+ send_sllao = 0;
+ }
+ in6_ifa_put(ifp);
+ } else {
+ send_sllao = 0;
+ }
}
-
- if (likely(idev != NULL))
- in6_dev_put(idev);
+#endif
+ __ndisc_send(dev, NULL, daddr, saddr,
+ &icmp6h, NULL,
+ send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0,
+ ICMP6_MIB_OUTROUTERSOLICITS);
}
@@ -708,8 +624,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
int probes = atomic_read(&neigh->probes);
- if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
- saddr = &skb->nh.ipv6h->saddr;
+ if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
+ saddr = &ipv6_hdr(skb)->saddr;
if ((probes -= neigh->parms->ucast_probes) < 0) {
if (!(neigh->nud_state & NUD_VALID)) {
@@ -732,11 +648,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
static void ndisc_recv_ns(struct sk_buff *skb)
{
- struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
- struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - msg->opt;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_ifaddr *ifp;
@@ -796,28 +713,40 @@ static void ndisc_recv_ns(struct sk_buff *skb)
inc = ipv6_addr_is_multicast(daddr);
if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
- if (ifp->flags & IFA_F_TENTATIVE) {
- /* Address is tentative. If the source
- is unspecified address, it is someone
- does DAD, otherwise we ignore solicitations
- until DAD timer expires.
- */
- if (!dad)
+
+ if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+ if (dad) {
+ if (dev->type == ARPHRD_IEEE802_TR) {
+ const unsigned char *sadr;
+ sadr = skb_mac_header(skb);
+ if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+ sadr[9] == dev->dev_addr[1] &&
+ sadr[10] == dev->dev_addr[2] &&
+ sadr[11] == dev->dev_addr[3] &&
+ sadr[12] == dev->dev_addr[4] &&
+ sadr[13] == dev->dev_addr[5]) {
+ /* looped-back to us */
+ goto out;
+ }
+ }
+
+ /*
+ * We are colliding with another node
+ * who is doing DAD
+ * so fail our DAD process
+ */
+ addrconf_dad_failure(ifp);
goto out;
- if (dev->type == ARPHRD_IEEE802_TR) {
- unsigned char *sadr = skb->mac.raw;
- if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
- sadr[9] == dev->dev_addr[1] &&
- sadr[10] == dev->dev_addr[2] &&
- sadr[11] == dev->dev_addr[3] &&
- sadr[12] == dev->dev_addr[4] &&
- sadr[13] == dev->dev_addr[5]) {
- /* looped-back to us */
+ } else {
+ /*
+ * This is not a dad solicitation.
+ * If we are an optimistic node,
+ * we should respond.
+ * Otherwise, we should ignore it.
+ */
+ if (!(ifp->flags & IFA_F_OPTIMISTIC))
goto out;
- }
}
- addrconf_dad_failure(ifp);
- return;
}
idev = ifp->idev;
@@ -898,11 +827,12 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
- struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
- struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - msg->opt;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_ifaddr *ifp;
@@ -1000,11 +930,11 @@ out:
static void ndisc_recv_rs(struct sk_buff *skb)
{
- struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+ struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
struct neighbour *neigh;
struct inet6_dev *idev;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
struct ndisc_options ndopts;
u8 *lladdr = NULL;
@@ -1057,7 +987,7 @@ out:
static void ndisc_router_discovery(struct sk_buff *skb)
{
- struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+ struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct rt6_info *rt = NULL;
@@ -1068,9 +998,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+ optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 RA: source address is not link-local.\n");
return;
@@ -1136,7 +1066,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
- rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+ rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
if (rt)
neigh = rt->rt6i_nexthop;
@@ -1151,7 +1081,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK3(KERN_DEBUG
"ICMPv6 RA: adding default router.\n");
- rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref);
+ rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() failed to add default route.\n",
@@ -1223,7 +1153,7 @@ skip_defrtr:
*/
if (!neigh)
- neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+ neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
skb->dev, 1);
if (neigh) {
u8 *lladdr = NULL;
@@ -1252,7 +1182,7 @@ skip_defrtr:
if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
- &skb->nh.ipv6h->saddr);
+ &ipv6_hdr(skb)->saddr);
}
}
#endif
@@ -1311,13 +1241,13 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
int optlen;
u8 *lladdr = NULL;
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: source address is not link-local.\n");
return;
}
- optlen = skb->tail - skb->h.raw;
+ optlen = skb->tail - skb->transport_header;
optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
if (optlen < 0) {
@@ -1326,7 +1256,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- icmph = (struct icmp6hdr *) skb->h.raw;
+ icmph = icmp6_hdr(skb);
target = (struct in6_addr *) (icmph + 1);
dest = target + 1;
@@ -1376,8 +1306,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
if (neigh) {
- rt6_redirect(dest, &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr, neigh, lladdr,
+ rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr, neigh, lladdr,
on_link);
neigh_release(neigh);
}
@@ -1406,21 +1336,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
dev = skb->dev;
- if (ipv6_get_lladdr(dev, &saddr_buf)) {
+ if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: no link-local address on %s\n",
dev->name);
return;
}
- if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) &&
+ if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: target address is not link-local.\n");
return;
}
- ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+ ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
dev->ifindex);
dst = ip6_route_output(NULL, &fl);
@@ -1475,11 +1405,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
hlen = 0;
skb_reserve(buff, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+ ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
IPPROTO_ICMPV6, len);
- icmph = (struct icmp6hdr *)skb_put(buff, len);
- buff->h.raw = (unsigned char*)icmph;
+ skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
+ skb_put(buff, len);
+ icmph = icmp6_hdr(buff);
memset(icmph, 0, sizeof(struct icmp6hdr));
icmph->icmp6_type = NDISC_REDIRECT;
@@ -1491,7 +1422,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
addrp = (struct in6_addr *)(icmph + 1);
ipv6_addr_copy(addrp, target);
addrp++;
- ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
opt = (u8*) (addrp + 1);
@@ -1512,9 +1443,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
*(opt++) = (rd_len >> 3);
opt += 6;
- memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+ memcpy(opt, ipv6_hdr(skb), rd_len - 8);
- icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+ icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
len, IPPROTO_ICMPV6,
csum_partial((u8 *) icmph, len, 0));
@@ -1544,14 +1475,14 @@ int ndisc_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb->len))
return 0;
- msg = (struct nd_msg *) skb->h.raw;
+ msg = (struct nd_msg *)skb_transport_header(skb);
- __skb_push(skb, skb->data-skb->h.raw);
+ __skb_push(skb, skb->data - skb_transport_header(skb));
- if (skb->nh.ipv6h->hop_limit != 255) {
+ if (ipv6_hdr(skb)->hop_limit != 255) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 NDISC: invalid hop-limit: %d\n",
- skb->nh.ipv6h->hop_limit);
+ ipv6_hdr(skb)->hop_limit);
return 0;
}
@@ -1584,7 +1515,7 @@ int ndisc_rcv(struct sk_buff *skb)
case NDISC_REDIRECT:
ndisc_redirect_rcv(skb);
break;
- };
+ }
return 0;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1c405dd30c67..38b149613915 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -11,7 +11,7 @@
int ip6_route_me_harder(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct dst_entry *dst;
struct flowi fl = {
.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -61,7 +61,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
rt_info->daddr = iph->daddr;
rt_info->saddr = iph->saddr;
@@ -73,7 +73,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(*pskb);
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
return ip6_route_me_harder(*pskb);
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
- struct ipv6hdr *ip6h = skb->nh.ipv6h;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index da07e9a88ee9..bbe99f842f9f 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -198,7 +198,7 @@ config IP6_NF_RAW
and OUTPUT chains.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
endmenu
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index fdb30a5916e5..0004db38af6d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -11,18 +11,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
- * to adapt it to IPv6
- * HEAVILY based in ipqueue.c by James Morris. It's just
- * a little modified version of it, so he's nearly the
- * real coder of this.
- * Few changes needed, mainly the hard_routing code and
- * the netlink socket protocol (we're NETLINK_IP6_FW).
- * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
- * 2005-02-04: Added /proc counter for dropped packets; fixed so
- * packets aren't delivered to user space if they're going
- * to be dropped.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -189,12 +177,13 @@ ipq_flush(int verdict)
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
+ struct timeval tv;
read_lock_bh(&queue_lock);
@@ -232,15 +221,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -376,7 +366,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
}
if (!skb_make_writable(&e->skb, v->data_len))
return -ENOMEM;
- memcpy(e->skb->data, v->payload, v->data_len);
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
@@ -485,7 +475,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (skblen < sizeof(*nlh))
return;
- nlh = (struct nlmsghdr *)skb->data;
+ nlh = nlmsg_hdr(skb);
nlmsglen = nlh->nlmsg_len;
if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return;
@@ -667,7 +657,7 @@ static int __init ip6_queue_init(void)
struct proc_dir_entry *proc;
netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+ ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7c512e13f956..9aa624026688 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -7,15 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * - increase module usage count as soon as we have rules inside
- * a table
- * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- * - new extension header parser code
- * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
- * - Unification of {ip,ip6}_tables into x_tables
- * - Removed tcp and udp code, since it's not ipv6 specific
*/
#include <linux/capability.h>
@@ -115,7 +106,7 @@ ip6_packet_match(const struct sk_buff *skb,
{
size_t i;
unsigned long ret;
- const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+ const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
@@ -301,7 +292,7 @@ ip6t_do_table(struct sk_buff **pskb,
goto no_match;
ADD_COUNTER(e->counters,
- ntohs((*pskb)->nh.ipv6h->payload_len)
+ ntohs(ipv6_hdr(*pskb)->payload_len)
+ IPV6_HDR_LEN,
1);
@@ -1448,8 +1439,8 @@ static void __exit ip6_tables_fini(void)
int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
int target, unsigned short *fragoff)
{
- unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
unsigned int len = skb->len - start;
if (fragoff)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ccbab66277e3..4115a576ba25 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -32,7 +32,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- ip6h = (*pskb)->nh.ipv6h;
+ ip6h = ipv6_hdr(*pskb);
switch (info->mode) {
case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7b..5bb9cd349350 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -396,8 +396,8 @@ ip6t_log_packet(unsigned int pf,
/* MAC logging for input chain only. */
printk("MAC=");
if (skb->dev && (len = skb->dev->hard_header_len) &&
- skb->mac.raw != skb->nh.raw) {
- unsigned char *p = skb->mac.raw;
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
int i;
if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
printk(" ");
if (skb->dev->type == ARPHRD_SIT) {
- struct iphdr *iph = (struct iphdr *)skb->mac.raw;
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
@@ -421,7 +422,7 @@ ip6t_log_packet(unsigned int pf,
printk(" ");
}
- dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
+ dump_packet(loginfo, skb, skb_network_offset(skb), 1);
printk("\n");
spin_unlock_bh(&log_lock);
}
@@ -489,14 +490,10 @@ static int __init ip6t_log_init(void)
ret = xt_register_target(&ip6t_log_reg);
if (ret < 0)
return ret;
- if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
- printk(KERN_WARNING "ip6t_LOG: not logging via system console "
- "since somebody else already registered for PF_INET6\n");
- /* we cannot make module load fail here, since otherwise
- * ip6tables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_INET6, &ip6t_logger);
+ if (ret < 0 && ret != -EEXIST)
+ xt_unregister_target(&ip6t_log_reg);
+ return ret;
}
static void __exit ip6t_log_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 6abee94c929f..cb3d2415a064 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct sk_buff *oldskb)
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
- struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+ struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
struct dst_entry *dst = NULL;
u8 proto;
struct flowi fl;
@@ -120,8 +120,9 @@ static void send_reset(struct sk_buff *oldskb)
skb_reserve(nskb, hh_len + dst->header_len);
- ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
- skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(nskb);
+ ip6h = ipv6_hdr(nskb);
ip6h->version = 6;
ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
ip6h->nexthdr = IPPROTO_TCP;
@@ -155,8 +156,8 @@ static void send_reset(struct sk_buff *oldskb)
tcph->check = 0;
/* Adjust TCP checksum */
- tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
- &nskb->nh.ipv6h->daddr,
+ tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+ &ipv6_hdr(nskb)->daddr,
sizeof(struct tcphdr), IPPROTO_TCP,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a8..0f3dd932f0a6 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
unsigned char eui64[8];
int i = 0;
- if (!(skb->mac.raw >= skb->head &&
- (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+ if (!(skb_mac_header(skb) >= skb->head &&
+ (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
offset != 0) {
*hotdrop = 1;
return 0;
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
memset(eui64, 0, sizeof(eui64));
if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
- if (skb->nh.ipv6h->version == 0x6) {
+ if (ipv6_hdr(skb)->version == 0x6) {
memcpy(eui64, eth_hdr(skb)->h_source, 3);
memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
eui64[3] = 0xff;
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
eui64[0] |= 0x02;
i = 0;
- while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i])
+ while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
&& (i < 8))
i++;
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 37c8a4d4ed78..d606c0e6d6fd 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -25,7 +25,7 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ip6t_hl_info *info = matchinfo;
- const struct ipv6hdr *ip6h = skb->nh.ipv6h;
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
switch (info->mode) {
case IP6T_HL_EQ:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 700a11d25deb..fd6a0869099b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -45,7 +45,7 @@ ipv6header_match(const struct sk_buff *skb,
/* Make sure this isn't an evil packet */
/* type of the 1st exthdr */
- nexthdr = skb->nh.ipv6h->nexthdr;
+ nexthdr = ipv6_hdr(skb)->nexthdr;
/* pointer to the 1st exthdr */
ptr = sizeof(struct ipv6hdr);
/* available length */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 112a21d0c6da..76f0cf66f95c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -102,7 +102,7 @@ ip6t_local_out_hook(unsigned int hook,
#if 0
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0c468d35a937..a9f10e32c163 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,8 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -138,7 +136,7 @@ ip6t_local_hook(unsigned int hook,
#if 0
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -146,21 +144,21 @@ ip6t_local_hook(unsigned int hook,
#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
- memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
- memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
+ memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
mark = (*pskb)->mark;
- hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+ hop_limit = ipv6_hdr(*pskb)->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+ flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
if (ret != NF_DROP && ret != NF_STOLEN
- && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
- || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
+ && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
+ || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
|| (*pskb)->mark != mark
- || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+ || ipv6_hdr(*pskb)->hop_limit != hop_limit))
return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d1102455668d..6d2a08205111 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -7,17 +7,6 @@
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - support Layer 3 protocol independent connection tracking.
- * Based on the original ip_conntrack code which had the following
- * copyright information:
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add get_features() to support various size of conntrack
- * structures.
*/
#include <linux/types.h>
@@ -138,16 +127,10 @@ static int
ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
- unsigned int extoff;
- unsigned char pnum;
- int protoff;
-
- extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
- pnum = (*pskb)->nh.ipv6h->nexthdr;
-
- protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
- (*pskb)->len - extoff);
-
+ unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+ int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+ (*pskb)->len - extoff);
/*
* (protoff == (*pskb)->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
@@ -179,9 +162,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
unsigned int ret, protoff;
- unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
- - (*pskb)->data;
- unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+ unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
/* This is where we call the helper: as the packet goes out. */
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 075da4f287b8..0be790d250f9 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -7,13 +7,6 @@
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - ICMPv6 tracking support. Derived from the original ip_conntrack code
- * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
- * copyright information:
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*/
#include <linux/types.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 15ab1e3e8b56..347ab7608231 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
struct sk_buff *fragments;
int len;
int meat;
- struct timeval stamp;
+ ktime_t stamp;
unsigned int csum;
__u8 last_in; /* has first/last segment arrived? */
#define COMPLETE 4
@@ -353,9 +353,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- init_timer(&fq->timer);
- fq->timer.function = nf_ct_frag6_expire;
- fq->timer.data = (long) fq;
+ setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
spin_lock_init(&fq->lock);
atomic_set(&fq->refcnt, 1);
@@ -400,19 +398,20 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
}
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
- ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
DEBUGP("offset is too large.\n");
return -1;
}
- if (skb->ip_summed == CHECKSUM_COMPLETE)
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
- csum_partial(skb->nh.raw,
- (u8*)(fhdr + 1) - skb->nh.raw,
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
0));
+ }
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -542,7 +541,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
fq->fragments = skb;
skb->dev = NULL;
- skb_get_timestamp(skb, &fq->stamp);
+ fq->stamp = skb->tstamp;
fq->meat += skb->len;
atomic_add(skb->truesize, &nf_ct_frag6_mem);
@@ -583,7 +582,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
- payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
DEBUGP("payload len is too large.\n");
goto out_oversize;
@@ -624,15 +625,15 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- head->nh.raw[fq->nhoffset] = head->h.raw[0];
+ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
- head->mac.raw += sizeof(struct frag_hdr);
- head->nh.raw += sizeof(struct frag_hdr);
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
- head->h.raw = head->data;
- skb_push(head, head->data - head->nh.raw);
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &nf_ct_frag6_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -648,12 +649,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &fq->stamp);
- head->nh.ipv6h->payload_len = htons(payload_len);
+ head->tstamp = fq->stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
fq->fragments = NULL;
@@ -701,9 +704,10 @@ out_fail:
static int
find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
{
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
- u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
- int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ const int netoff = skb_network_offset(skb);
+ u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+ int start = netoff + sizeof(struct ipv6hdr);
int len = skb->len - start;
u8 prevhdr = NEXTHDR_IPV6;
@@ -759,7 +763,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
- if (skb->nh.ipv6h->payload_len == 0) {
+ if (ipv6_hdr(skb)->payload_len == 0) {
DEBUGP("payload len = 0\n");
return skb;
}
@@ -780,9 +784,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- clone->h.raw = clone->data + fhoff;
- hdr = clone->nh.ipv6h;
- fhdr = (struct frag_hdr *)clone->h.raw;
+ skb_set_transport_header(clone, fhoff);
+ hdr = ipv6_hdr(clone);
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
if (!(fhdr->frag_off & htons(0xFFF9))) {
DEBUGP("Invalid fragment offset\n");
@@ -864,8 +868,7 @@ int nf_ct_frag6_init(void)
nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
- init_timer(&nf_ct_frag6_secret_timer);
- nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+ setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
nf_ct_frag6_secret_timer.expires = jiffies
+ nf_ct_frag6_secret_interval;
add_timer(&nf_ct_frag6_secret_timer);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index fa3fb509f187..920dc9cf6a84 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,12 +23,12 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stddef.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/transp_v6.h>
#include <net/ipv6.h>
-#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *proc_net_devsnmp6;
static int fold_prot_inuse(struct proto *proto)
@@ -142,26 +142,13 @@ static struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_SENTINEL
};
-static unsigned long
-fold_field(void *mib[], int offt)
-{
- unsigned long res = 0;
- int i;
-
- for_each_possible_cpu(i) {
- res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
- res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
- }
- return res;
-}
-
static inline void
snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
{
int i;
for (i=0; itemlist[i].name; i++)
seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
- fold_field(mib, itemlist[i].entry));
+ snmp_fold_field(mib, itemlist[i].entry));
}
static int snmp6_seq_show(struct seq_file *seq, void *v)
@@ -236,6 +223,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
return -EINVAL;
remove_proc_entry(idev->stats.proc_dir_entry->name,
proc_net_devsnmp6);
+ idev->stats.proc_dir_entry = NULL;
return 0;
}
@@ -271,47 +259,3 @@ void ipv6_misc_proc_exit(void)
proc_net_remove("snmp6");
}
-#else /* CONFIG_PROC_FS */
-
-
-int snmp6_register_dev(struct inet6_dev *idev)
-{
- return 0;
-}
-
-int snmp6_unregister_dev(struct inet6_dev *idev)
-{
- return 0;
-}
-#endif /* CONFIG_PROC_FS */
-
-int snmp6_alloc_dev(struct inet6_dev *idev)
-{
- int err = -ENOMEM;
-
- if (!idev || !idev->dev)
- return -EINVAL;
-
- if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
- goto err_ip;
- if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
- goto err_icmp;
-
- return 0;
-
-err_icmp:
- snmp6_mib_free((void **)idev->stats.ipv6);
-err_ip:
- return err;
-}
-
-int snmp6_free_dev(struct inet6_dev *idev)
-{
- snmp6_mib_free((void **)idev->stats.icmpv6);
- snmp6_mib_free((void **)idev->stats.ipv6);
- return 0;
-}
-
-
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index ef43bd57baed..f929f47b925e 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,6 +60,8 @@ int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
return ret;
}
+EXPORT_SYMBOL(inet6_add_protocol);
+
/*
* Remove a protocol from the hash tables.
*/
@@ -83,3 +85,5 @@ int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
return ret;
}
+
+EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 306d5d83c068..009a1047fc3f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
int delivered = 0;
__u8 hash;
- saddr = &skb->nh.ipv6h->saddr;
+ saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
hash = nexthdr & (MAX_INET_PROTOS - 1);
@@ -361,17 +361,18 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- skb_postpull_rcsum(skb, skb->nh.raw,
- skb->h.raw - skb->nh.raw);
- if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
skb->len, inet->num, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
- skb->len, inet->num, 0));
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len,
+ inet->num, 0));
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
@@ -420,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
msg->msg_flags |= MSG_TRUNC;
}
- if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+ if (skb_csum_unnecessary(skb)) {
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
} else if (msg->msg_flags&MSG_TRUNC) {
if (__skb_checksum_complete(skb))
@@ -438,7 +439,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (sin6) {
sin6->sin6_family = AF_INET6;
sin6->sin6_port = 0;
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -488,7 +489,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
goto out;
offset = rp->offset;
- total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+ total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
+ skb->data);
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
@@ -511,7 +513,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
if (csum_skb)
continue;
- len = skb->len - (skb->h.raw - skb->data);
+ len = skb->len - skb_transport_offset(skb);
if (offset >= len) {
offset -= len;
continue;
@@ -523,7 +525,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
skb = csum_skb;
}
- offset += skb->h.raw - skb->data;
+ offset += skb_transport_offset(skb);
if (skb_copy_bits(skb, offset, &csum, 2))
BUG();
@@ -575,11 +577,13 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst);
- skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+ skb_put(skb, length);
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
skb->ip_summed = CHECKSUM_NONE;
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
err = memcpy_fromiovecend((void *)iph, from, 0, length);
if (err)
goto error_fault;
@@ -687,9 +691,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
int err;
/* Rough check on arithmetic overflow,
- better check is made in ip6_build_xmit
+ better check is made in ip6_append_data().
*/
- if (len < 0)
+ if (len > INT_MAX)
return -EMSGSIZE;
/* Mirror BSD error message compatibility */
@@ -878,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
return 0;
}
@@ -903,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
return 0;
}
@@ -957,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
default:
return ipv6_setsockopt(sk, level, optname, optval,
optlen);
- };
+ }
+
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
}
@@ -978,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
default:
return compat_ipv6_setsockopt(sk, level, optname,
optval, optlen);
- };
+ }
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
}
#endif
@@ -1031,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
default:
return ipv6_getsockopt(sk, level, optname, optval,
optlen);
- };
+ }
+
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
@@ -1052,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
default:
return compat_ipv6_getsockopt(sk, level, optname,
optval, optlen);
- };
+ }
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
#endif
@@ -1073,7 +1079,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL)
- amount = skb->tail - skb->h.raw;
+ amount = skb->tail - skb->transport_header;
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7034c54e5010..de795c04e34c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -88,7 +88,7 @@ struct frag_queue
int len;
int meat;
int iif;
- struct timeval stamp;
+ ktime_t stamp;
unsigned int csum;
__u8 last_in; /* has first/last segment arrived? */
#define COMPLETE 4
@@ -430,19 +430,24 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
- ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((u8 *)&fhdr->frag_off -
+ skb_network_header(skb)));
return;
}
- if (skb->ip_summed == CHECKSUM_COMPLETE)
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
- csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+ 0));
+ }
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -562,7 +567,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (skb->dev)
fq->iif = skb->dev->ifindex;
skb->dev = NULL;
- skb_get_timestamp(skb, &fq->stamp);
+ fq->stamp = skb->tstamp;
fq->meat += skb->len;
atomic_add(skb->truesize, &ip6_frag_mem);
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
BUG_TRAP(FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
- payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -639,15 +646,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- head->nh.raw[nhoff] = head->h.raw[0];
+ skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
- head->mac.raw += sizeof(struct frag_hdr);
- head->nh.raw += sizeof(struct frag_hdr);
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
- head->h.raw = head->data;
- skb_push(head, head->data - head->nh.raw);
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip6_frag_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -663,15 +670,17 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &fq->stamp);
- head->nh.ipv6h->payload_len = htons(payload_len);
+ head->tstamp = fq->stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
IP6CB(head)->nhoff = nhoff;
*skb_in = head;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
rcu_read_lock();
IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -699,33 +708,34 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
struct net_device *dev = skb->dev;
struct frag_hdr *fhdr;
struct frag_queue *fq;
- struct ipv6hdr *hdr;
-
- hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len==0) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ skb_network_header_len(skb));
return -1;
}
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+ sizeof(struct frag_hdr)))) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ skb_network_header_len(skb));
return -1;
}
- hdr = skb->nh.ipv6h;
- fhdr = (struct frag_hdr *)skb->h.raw;
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
if (!(fhdr->frag_off & htons(0xFFF9))) {
/* It is not a fragmented frame */
- skb->h.raw += sizeof(struct frag_hdr);
+ skb->transport_header += sizeof(struct frag_hdr);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
- IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
+ IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0e1f4b2cd3dd..b46ad53044ba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -308,27 +308,18 @@ static inline void rt6_probe(struct rt6_info *rt)
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static int inline rt6_check_dev(struct rt6_info *rt, int oif)
+static inline int rt6_check_dev(struct rt6_info *rt, int oif)
{
struct net_device *dev = rt->rt6i_dev;
- int ret = 0;
-
- if (!oif)
+ if (!oif || dev->ifindex == oif)
return 2;
- if (dev->flags & IFF_LOOPBACK) {
- if (!WARN_ON(rt->rt6i_idev == NULL) &&
- rt->rt6i_idev->dev->ifindex == oif)
- ret = 1;
- else
- return 0;
- }
- if (dev->ifindex == oif)
- return 2;
-
- return ret;
+ if ((dev->flags & IFF_LOOPBACK) &&
+ rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+ return 1;
+ return 0;
}
-static int inline rt6_check_neigh(struct rt6_info *rt)
+static inline int rt6_check_neigh(struct rt6_info *rt)
{
struct neighbour *neigh = rt->rt6i_nexthop;
int m = 0;
@@ -363,55 +354,76 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
return m;
}
-static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
- int strict)
+static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
+ int *mpri, struct rt6_info *match)
{
- struct rt6_info *match = NULL, *last = NULL;
- struct rt6_info *rt, *rt0 = *head;
- u32 metric;
+ int m;
+
+ if (rt6_check_expired(rt))
+ goto out;
+
+ m = rt6_score_route(rt, oif, strict);
+ if (m < 0)
+ goto out;
+
+ if (m > *mpri) {
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(match);
+ *mpri = m;
+ match = rt;
+ } else if (strict & RT6_LOOKUP_F_REACHABLE) {
+ rt6_probe(rt);
+ }
+
+out:
+ return match;
+}
+
+static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *rr_head,
+ u32 metric, int oif, int strict)
+{
+ struct rt6_info *rt, *match;
int mpri = -1;
- RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
- __FUNCTION__, head, head ? *head : NULL, oif);
+ match = NULL;
+ for (rt = rr_head; rt && rt->rt6i_metric == metric;
+ rt = rt->u.dst.rt6_next)
+ match = find_match(rt, oif, strict, &mpri, match);
+ for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
+ rt = rt->u.dst.rt6_next)
+ match = find_match(rt, oif, strict, &mpri, match);
- for (rt = rt0, metric = rt0->rt6i_metric;
- rt && rt->rt6i_metric == metric && (!last || rt != rt0);
- rt = rt->u.dst.rt6_next) {
- int m;
+ return match;
+}
- if (rt6_check_expired(rt))
- continue;
+static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+{
+ struct rt6_info *match, *rt0;
- last = rt;
+ RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
+ __FUNCTION__, fn->leaf, oif);
- m = rt6_score_route(rt, oif, strict);
- if (m < 0)
- continue;
+ rt0 = fn->rr_ptr;
+ if (!rt0)
+ fn->rr_ptr = rt0 = fn->leaf;
- if (m > mpri) {
- if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(match);
- match = rt;
- mpri = m;
- } else if (strict & RT6_LOOKUP_F_REACHABLE) {
- rt6_probe(rt);
- }
- }
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
if (!match &&
- (strict & RT6_LOOKUP_F_REACHABLE) &&
- last && last != rt0) {
+ (strict & RT6_LOOKUP_F_REACHABLE)) {
+ struct rt6_info *next = rt0->u.dst.rt6_next;
+
/* no entries matched; do round-robin */
- static DEFINE_SPINLOCK(lock);
- spin_lock(&lock);
- *head = rt0->u.dst.rt6_next;
- rt0->u.dst.rt6_next = last->u.dst.rt6_next;
- last->u.dst.rt6_next = rt0;
- spin_unlock(&lock);
+ if (!next || next->rt6i_metric != rt0->rt6i_metric)
+ next = fn->leaf;
+
+ if (next != rt0)
+ fn->rr_ptr = next;
}
- RT6_TRACE("%s() => %p, score=%d\n",
- __FUNCTION__, match, mpri);
+ RT6_TRACE("%s() => %p\n",
+ __FUNCTION__, match);
return (match ? match : &ip6_null_entry);
}
@@ -563,6 +575,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
return NULL;
}
+EXPORT_SYMBOL(rt6_lookup);
+
/* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
@@ -657,7 +671,7 @@ restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
+ rt = rt6_select(fn, fl->iif, strict | reachable);
BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -712,7 +726,7 @@ out2:
void ip6_route_input(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
@@ -752,7 +766,7 @@ restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
+ rt = rt6_select(fn, fl->oif, strict | reachable);
BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -817,6 +831,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
+EXPORT_SYMBOL(ip6_route_output);
/*
* Destination cache support functions
@@ -1745,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
rtnl_unlock();
return err;
- };
+ }
return -EINVAL;
}
@@ -1754,13 +1769,22 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
* Drop the packet on the floor
*/
-static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
+static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
+ int ipstats_mib_noroutes)
{
- int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
- if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
- IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
-
- IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
+ int type;
+ switch (ipstats_mib_noroutes) {
+ case IPSTATS_MIB_INNOROUTES:
+ type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
+ if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
+ break;
+ }
+ /* FALLTHROUGH */
+ case IPSTATS_MIB_OUTNOROUTES:
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
+ break;
+ }
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
kfree_skb(skb);
return 0;
@@ -1768,26 +1792,26 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
static int ip6_pkt_discard(struct sk_buff *skb)
{
- return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
+ return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
static int ip6_pkt_discard_out(struct sk_buff *skb)
{
skb->dev = skb->dst->dev;
- return ip6_pkt_discard(skb);
+ return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
- return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
+ return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
static int ip6_pkt_prohibit_out(struct sk_buff *skb)
{
skb->dev = skb->dst->dev;
- return ip6_pkt_prohibit(skb);
+ return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
static int ip6_pkt_blk_hole(struct sk_buff *skb)
@@ -1991,7 +2015,7 @@ errout:
return err;
}
-int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
int err;
@@ -2003,7 +2027,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return ip6_route_del(&cfg);
}
-int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
int err;
@@ -2140,7 +2164,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix, NLM_F_MULTI);
}
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
struct nlattr *tb[RTA_MAX+1];
struct rt6_info *rt;
@@ -2194,7 +2218,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
*/
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
@@ -2465,8 +2489,9 @@ ctl_table ipv6_route_table[] = {
void __init ip6_route_init(void)
{
+#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
-
+#endif
ip6_dst_ops.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -2484,6 +2509,10 @@ void __init ip6_route_init(void)
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
fib6_rules_init();
#endif
+
+ __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
}
void ip6_route_cleanup(void)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 08d6ed3396e4..1efa95a99f45 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -99,10 +99,10 @@ static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
return NULL;
}
-static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -117,6 +117,11 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
+{
+ return __ipip6_bucket(&t->parms);
+}
+
static void ipip6_tunnel_unlink(struct ip_tunnel *t)
{
struct ip_tunnel **tp;
@@ -147,19 +152,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = 0;
- int prio = 0;
char name[IFNAMSIZ];
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -224,8 +219,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
ICMP in the real Internet is absolutely infeasible.
*/
struct iphdr *iph = (struct iphdr*)skb->data;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err;
@@ -280,8 +275,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
int hlen = iph->ihl<<2;
struct ipv6hdr *iph6;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
int rel_info = 0;
@@ -296,14 +291,14 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < hlen)
+ if (icmp_hdr(skb)->un.gateway < hlen)
return;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMPV6_PARAMPROB;
- rel_info = skb->h.icmph->un.gateway - hlen;
+ rel_info = icmp_hdr(skb)->un.gateway - hlen;
break;
case ICMP_DEST_UNREACH:
@@ -340,7 +335,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)iph6);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
@@ -366,7 +361,7 @@ out:
static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
static int ipip6_rcv(struct sk_buff *skb)
@@ -377,13 +372,13 @@ static int ipip6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
read_lock(&ipip6_lock);
if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
@@ -430,7 +425,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
- struct ipv6hdr *iph6 = skb->nh.ipv6h;
+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
@@ -468,7 +463,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &skb->nh.ipv6h->daddr;
+ addr6 = &ipv6_hdr(skb)->daddr;
addr_type = ipv6_addr_type(addr6);
}
@@ -550,11 +545,12 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- iph6 = skb->nh.ipv6h;
+ iph6 = ipv6_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags = 0;
dst_release(skb->dst);
@@ -564,7 +560,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
if (mtu > IPV6_MIN_MTU)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f57a9baa6b27..e2f25ea43b68 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -115,10 +115,10 @@ static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
{
- return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
- skb->h.th->dest,
- skb->h.th->source);
+ return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
struct sk_buff *pktopts = treq->pktopts;
struct inet6_skb_parm *rxopt = IP6CB(pktopts);
if (rxopt->srcrt)
- opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+ opt = ipv6_invert_rthdr(sk,
+ (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+ rxopt->srcrt));
}
if (opt && opt->srcrt) {
@@ -507,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
skb = tcp_make_synack(sk, dst, req);
if (skb) {
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
th->check = tcp_v6_check(th, skb->len,
&treq->loc_addr, &treq->rmt_addr,
@@ -835,8 +837,8 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
{
__u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
- struct ipv6hdr *ip6h = skb->nh.ipv6h;
- struct tcphdr *th = skb->h.th;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff << 2) - sizeof (*th);
int genhash;
u8 *ptr;
@@ -944,10 +946,11 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
@@ -964,12 +967,13 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(*th)))
return -EINVAL;
- ipv6h = skb->nh.ipv6h;
- th = skb->h.th;
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
th->check = 0;
th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
IPPROTO_TCP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
@@ -977,7 +981,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th, *t1;
+ struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
int tot_len = sizeof(*th);
@@ -993,7 +997,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_TCP_MD5SIG
if (sk)
- key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr);
+ key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
else
key = NULL;
@@ -1037,20 +1041,18 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8*)&opt[1],
- key,
- &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr,
- t1, IPPROTO_TCP,
- tot_len);
+ tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr,
+ t1, IPPROTO_TCP, tot_len);
}
#endif
buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
sizeof(*t1), IPPROTO_TCP,
@@ -1079,7 +1081,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
{
- struct tcphdr *th = skb->h.th, *t1;
+ struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
int tot_len = sizeof(struct tcphdr);
@@ -1091,7 +1093,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
#ifdef CONFIG_TCP_MD5SIG
if (!tw && skb->sk) {
- key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr);
+ key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
} else if (tw && tw->tw_md5_keylen) {
tw_key.key = tw->tw_md5_key;
tw_key.keylen = tw->tw_md5_keylen;
@@ -1140,20 +1142,18 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8 *)topt,
- key,
- &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr,
- t1, IPPROTO_TCP,
- tot_len);
+ tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr,
+ t1, IPPROTO_TCP, tot_len);
}
#endif
buff->csum = csum_partial((char *)t1, tot_len, 0);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
tot_len, IPPROTO_TCP,
@@ -1197,18 +1197,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
struct request_sock *req, **prev;
- const struct tcphdr *th = skb->h.th;
+ const struct tcphdr *th = tcp_hdr(skb);
struct sock *nsk;
/* Find possible connection requests. */
req = inet6_csk_search_req(sk, &prev, th->source,
- &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, inet6_iif(skb));
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, inet6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
- th->source, &skb->nh.ipv6h->daddr,
+ nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+ th->source, &ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (nsk) {
@@ -1275,9 +1275,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
treq = inet6_rsk(req);
- ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
- TCP_ECN_create_request(req, skb->h.th);
+ ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+ TCP_ECN_create_request(req, tcp_hdr(skb));
treq->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1363,7 +1363,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1389,7 +1389,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
opt == NULL && treq->pktopts) {
struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
if (rxopt->srcrt)
- opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
+ opt = ipv6_invert_rthdr(sk,
+ (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
+ rxopt->srcrt));
}
if (dst == NULL) {
@@ -1453,6 +1455,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
First: no IPv4 options.
*/
newinet->opt = NULL;
+ newnp->ipv6_fl_list = NULL;
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
@@ -1468,7 +1471,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/* Clone native IPv6 options from listening socket (if any)
@@ -1527,15 +1530,16 @@ out:
static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,skb->csum)) {
+ if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
- skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, 0));
+ skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0));
if (skb->len <= 76) {
return __skb_checksum_complete(skb);
@@ -1599,7 +1603,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
TCP_CHECK_TIMER(sk);
if (opt_skb)
@@ -1607,7 +1611,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+ if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
@@ -1630,7 +1634,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
TCP_CHECK_TIMER(sk);
if (opt_skb)
@@ -1663,7 +1667,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+ np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1696,28 +1700,27 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
- th = skb->h.th;
+ th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr)/4)
goto bad_packet;
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
- tcp_v6_checksum_init(skb)))
+ if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
goto bad_packet;
- th = skb->h.th;
+ th = tcp_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+ TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
- &skb->nh.ipv6h->daddr, ntohs(th->dest),
+ sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest),
inet6_iif(skb));
if (!sk)
@@ -1797,7 +1800,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(&tcp_hashinfo,
- &skb->nh.ipv6h->daddr,
+ &ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (sk2 != NULL) {
struct inet_timewait_sock *tw = inet_twsk(sk);
@@ -1944,6 +1947,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
return inet6_destroy_sock(sk);
}
+#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
struct sock *sk, struct request_sock *req, int i, int uid)
@@ -2060,7 +2064,6 @@ static void get_timewait6_sock(struct seq_file *seq,
atomic_read(&tw->tw_refcnt), tw);
}
-#ifdef CONFIG_PROC_FS
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0ad471909881..b083c09e3d2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -93,10 +93,10 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
continue;
score++;
}
- if(score == 4) {
+ if (score == 4) {
result = sk;
break;
- } else if(score > badness) {
+ } else if (score > badness) {
result = sk;
badness = score;
}
@@ -120,8 +120,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
- size_t copied;
- int err, copy_only, is_udplite = IS_UDPLITE(sk);
+ unsigned int ulen, copied;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
if (addr_len)
*addr_len=sizeof(struct sockaddr_in6);
@@ -134,24 +135,25 @@ try_again:
if (!skb)
goto out;
- copied = skb->len - sizeof(struct udphdr);
- if (copied > len) {
- copied = len;
+ ulen = skb->len - sizeof(struct udphdr);
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
- }
/*
- * Decide whether to checksum and/or copy data.
+ * If checksum is needed at all, try to do it while copying the
+ * data. If the data is truncated, or if we only want a partial
+ * coverage checksum (UDP-Lite), do it before the copy.
*/
- copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
- if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
- if (__udp_lib_checksum_complete(skb))
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
- copy_only = 1;
}
- if (copy_only)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
@@ -170,15 +172,16 @@ try_again:
sin6 = (struct sockaddr_in6 *) msg->msg_name;
sin6->sin6_family = AF_INET6;
- sin6->sin6_port = skb->h.uh->source;
+ sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IP))
ipv6_addr_set(&sin6->sin6_addr, 0, 0,
- htonl(0xffff), skb->nh.iph->saddr);
+ htonl(0xffff), ip_hdr(skb)->saddr);
else {
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr,
+ &ipv6_hdr(skb)->saddr);
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6->sin6_scope_id = IP6CB(skb)->iif;
}
@@ -194,7 +197,7 @@ try_again:
err = copied;
if (flags & MSG_TRUNC)
- err = skb->len - sizeof(struct udphdr);
+ err = ulen;
out_free:
skb_free_datagram(sk, skb);
@@ -279,8 +282,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (udp_lib_checksum_complete(skb))
- goto drop;
+ if (sk->sk_filter) {
+ if (udp_lib_checksum_complete(skb))
+ goto drop;
+ }
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
@@ -325,7 +330,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
continue;
}
- if(!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(s, loc_addr, rmt_addr))
continue;
return s;
}
@@ -341,7 +346,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
struct in6_addr *daddr, struct hlist_head udptable[])
{
struct sock *sk, *sk2;
- const struct udphdr *uh = skb->h.uh;
+ const struct udphdr *uh = udp_hdr(skb);
int dif;
read_lock(&udp_hash_lock);
@@ -366,9 +371,20 @@ out:
return 0;
}
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
-
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
+ int proto)
{
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
if (uh->check == 0) {
/* RFC 2460 section 8.1 says that we SHOULD log
this error. Well, it is reasonable.
@@ -377,21 +393,20 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
- skb->len, IPPROTO_UDP, skb->csum ))
+ !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->len, proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
- skb->len, IPPROTO_UDP,
- 0));
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len, proto, 0));
- return (UDP_SKB_CB(skb)->partial_cov = 0);
+ return 0;
}
int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
- int is_udplite)
+ int proto)
{
struct sk_buff *skb = *pskb;
struct sock *sk;
@@ -403,15 +418,16 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto short_packet;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- uh = skb->h.uh;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
ulen = ntohs(uh->len);
if (ulen > skb->len)
goto short_packet;
- if(! is_udplite ) { /* UDP validates ulen. */
+ if (proto == IPPROTO_UDP) {
+ /* UDP validates ulen. */
/* Check for jumbo payload */
if (ulen == 0)
@@ -423,19 +439,15 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (ulen < skb->len) {
if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- uh = skb->h.uh;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
}
-
- if (udp6_csum_init(skb, uh))
- goto discard;
-
- } else { /* UDP-Lite validates cscov. */
- if (udplite6_csum_init(skb, uh))
- goto discard;
}
+ if (udp6_csum_init(skb, uh, proto))
+ goto discard;
+
/*
* Multicast receive code
*/
@@ -457,33 +469,34 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto discard;
- UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+ UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
kfree_skb(skb);
- return(0);
+ return 0;
}
/* deliver */
udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- return(0);
+ return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
- is_udplite? "-Lite" : "", ulen, skb->len);
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
+ ulen, skb->len);
discard:
- UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
- return(0);
+ return 0;
}
static __inline__ int udpv6_rcv(struct sk_buff **pskb)
{
- return __udp6_lib_rcv(pskb, udp_hash, 0);
+ return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
}
/*
@@ -521,7 +534,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
/*
* Create a UDP header
*/
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
@@ -615,7 +628,7 @@ do_udp_sendmsg:
return udp_sendmsg(iocb, sk, msg, len);
/* Rough check on arithmetic overflow,
- better check is made in ip6_build_xmit
+ better check is made in ip6_append_data().
*/
if (len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE;
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 629f97162fbc..f54016a55004 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -19,7 +19,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
static int udplitev6_rcv(struct sk_buff **pskb)
{
- return __udp6_lib_rcv(pskb, udplite_hash, 1);
+ return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 31f651f95096..d7ed8aa56ec1 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,14 +28,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
unsigned int nhoff;
nhoff = IP6CB(skb)->nhoff;
- nexthdr = skb->nh.raw[nhoff];
+ nexthdr = skb_network_header(skb)[nhoff];
seq = 0;
if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
goto drop;
do {
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
if (xfrm_nr == XFRM_MAX_DEPTH)
goto drop;
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
if (nexthdr <= 0)
goto drop_unlock;
- skb->nh.raw[nhoff] = nexthdr;
+ skb_network_header(skb)[nhoff] = nexthdr;
if (x->props.replay_window)
xfrm_replay_advance(x, seq);
@@ -112,8 +112,8 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
return -1;
} else {
#ifdef CONFIG_NETFILTER
- skb->nh.ipv6h->payload_len = htons(skb->len);
- __skb_push(skb, skb->data - skb->nh.raw);
+ ipv6_hdr(skb)->payload_len = htons(skb->len);
+ __skb_push(skb, skb->data - skb_network_header(skb));
NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
ip6_rcv_finish);
@@ -140,19 +140,19 @@ int xfrm6_rcv(struct sk_buff **pskb)
return xfrm6_rcv_spi(*pskb, 0);
}
+EXPORT_SYMBOL(xfrm6_rcv);
+
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
struct xfrm_state *x = NULL;
int wildcard = 0;
- struct in6_addr any;
xfrm_address_t *xany;
struct xfrm_state *xfrm_vec_one = NULL;
int nh = 0;
int i = 0;
- ipv6_addr_set(&any, 0, 0, 0, 0);
- xany = (xfrm_address_t *)&any;
+ xany = (xfrm_address_t *)&in6addr_any;
for (i = 0; i < 3; i++) {
xfrm_address_t *dst, *src;
@@ -247,3 +247,5 @@ drop:
xfrm_state_put(xfrm_vec_one);
return -1;
}
+
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index edcfffa9e87b..2e61d6ddece3 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -38,17 +38,18 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
- skb->nh.raw = skb->data;
- top_iph = skb->nh.ipv6h;
- skb->nh.raw = &top_iph->nexthdr;
- skb->h.ipv6h = top_iph + 1;
+ skb_reset_network_header(skb);
+ top_iph = ipv6_hdr(skb);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb->network_header += offsetof(struct ipv6hdr, nexthdr);
ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
@@ -59,6 +60,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *ip6h;
+ const unsigned char *old_mac;
int size = sizeof(struct ipv6hdr);
int err = -EINVAL;
@@ -66,13 +68,14 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
skb_push(skb, size);
- memmove(skb->data, skb->nh.raw, size);
- skb->nh.raw = skb->data;
+ memmove(skb->data, skb_network_header(skb), size);
+ skb_reset_network_header(skb);
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
- ip6h = skb->nh.ipv6h;
+ ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(skb->len - size);
ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 6031c16d46ca..6ad6d7ac6bd7 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -50,11 +50,12 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12bad..c026bfea820a 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -32,11 +32,12 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
return 0;
}
@@ -51,13 +52,16 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
- int ihl = skb->data - skb->h.raw;
+ int ihl = skb->data - skb_transport_header(skb);
- if (skb->h.raw != skb->nh.raw)
- skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
- skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
sizeof(struct ipv6hdr));
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0bc866c0d83c..a6c0cdf46ad6 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,8 +18,8 @@
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
- struct ipv6hdr *outer_iph = skb->nh.ipv6h;
- struct ipv6hdr *inner_iph = skb->h.ipv6h;
+ struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
IP6_ECN_set_ce(inner_iph);
@@ -27,8 +27,8 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
{
- if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h)))
- IP_ECN_set_ce(skb->h.ipiph);
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
+ IP_ECN_set_ce(ipip_hdr(skb));
}
/* Add encapsulation header.
@@ -51,12 +51,12 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
int dsfield;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
- skb->nh.raw = skb->data;
- top_iph = skb->nh.ipv6h;
- skb->nh.raw = &top_iph->nexthdr;
- skb->h.ipv6h = top_iph + 1;
+ skb_reset_network_header(skb);
+ top_iph = ipv6_hdr(skb);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb->network_header += offsetof(struct ipv6hdr, nexthdr);
top_iph->version = 6;
if (xdst->route->ops->family == AF_INET6) {
@@ -86,9 +86,11 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ const unsigned char *old_mac;
+ const unsigned char *nh = skb_network_header(skb);
- if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
- && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+ if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
+ nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
goto out;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
@@ -97,9 +99,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
goto out;
- if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+ nh = skb_network_header(skb);
+ if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+ ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
} else {
@@ -107,9 +110,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
ip6ip_ecn_decapsulate(skb);
skb->protocol = htons(ETH_P_IP);
}
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
- skb->nh.raw = skb->data;
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_reset_network_header(skb);
err = 0;
out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2b..56364a5f676a 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,6 +23,8 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
return ip6_find_1stfragopt(skb, prevhdr);
}
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+
static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
@@ -76,11 +78,11 @@ static int xfrm6_output_one(struct sk_buff *skb)
x->curlft.bytes += skb->len;
x->curlft.packets++;
if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
- x->lastused = (u64)xtime.tv_sec;
+ x->lastused = get_seconds();
spin_unlock_bh(&x->lock);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (!(skb->dst = dst_pop(dst))) {
err = -EHOSTUNREACH;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb4..1faa2ea80afc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
if (!afinfo) {
dst = *dst_p;
goto error;
- };
+ }
+
dst_prev->output = afinfo->output;
xfrm_state_put_afinfo(afinfo);
/* Sheit... I remember I did this right. Apparently,
@@ -270,17 +271,19 @@ error:
static inline void
_decode_session6(struct sk_buff *skb, struct flowi *fl)
{
- u16 offset = skb->h.raw - skb->nh.raw;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ u16 offset = skb_network_header_len(skb);
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct ipv6_opt_hdr *exthdr;
- u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+ const unsigned char *nh = skb_network_header(skb);
+ u8 nexthdr = nh[IP6CB(skb)->nhoff];
memset(fl, 0, sizeof(struct flowi));
ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
- while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+ nh = skb_network_header(skb);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
switch (nexthdr) {
case NEXTHDR_ROUTING:
@@ -288,7 +291,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
case NEXTHDR_DEST:
offset += ipv6_optlen(exthdr);
nexthdr = exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
break;
case IPPROTO_UDP:
@@ -296,7 +299,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
__be16 *ports = (__be16 *)exthdr;
fl->fl_ip_sport = ports[0];
@@ -306,7 +309,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
return;
case IPPROTO_ICMPV6:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
u8 *icmp = (u8 *)exthdr;
fl->fl_icmp_type = icmp[0];
@@ -317,7 +320,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
#ifdef CONFIG_IPV6_MIP6
case IPPROTO_MH:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
mh = (struct ip6_mh *)exthdr;
@@ -335,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
fl->fl_ipsec_spi = 0;
fl->proto = nexthdr;
return;
- };
+ }
}
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ee4b84a33ff4..5502cc948dfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -58,7 +58,7 @@ static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
-static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
+static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
{
unsigned h;
@@ -70,7 +70,7 @@ static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
return h;
}
-static unsigned inline xfrm6_tunnel_spi_hash_byspi(u32 spi)
+static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
{
return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
}
@@ -257,11 +257,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_rcv(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
__be32 spi;
spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, spi);
+ return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index cac35a77f069..15419dd682fd 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -576,7 +576,9 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
skb2 = alloc_skb(len, GFP_ATOMIC);
if (skb2) {
skb_reserve(skb2, out_offset);
- skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len);
+ skb_reset_network_header(skb2);
+ skb_reset_transport_header(skb2);
+ skb_put(skb2, skb->len);
memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
}
@@ -1807,8 +1809,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
copied);
if (rc)
goto out_free;
- if (skb->tstamp.off_sec)
- skb_get_timestamp(skb, &sk->sk_stamp);
+ if (skb->tstamp.tv64)
+ sk->sk_stamp = skb->tstamp;
msg->msg_namelen = sizeof(*sipx);
@@ -1959,7 +1961,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
.sendpage = sock_no_sendpage,
};
-#include <linux/smp_lock.h>
SOCKOPS_WRAP(ipx_dgram, PF_IPX);
static struct packet_type ipx_8023_packet_type = {
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8e1cad971f11..e16c11423527 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -203,7 +203,9 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
skb->sk = sk;
/* Fill in IPX header */
- skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(struct ipxhdr));
ipx = ipx_hdr(skb);
ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
IPX_SKB_CB(skb)->ipx_tctrl = 0;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index eabd6838f50a..dcd7e325b283 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -89,7 +89,6 @@ static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
self = instance;
sk = instance;
- IRDA_ASSERT(sk != NULL, return -1;);
err = sock_queue_rcv_skb(sk, skb);
if (err) {
@@ -131,15 +130,12 @@ static void irda_disconnect_indication(void *instance, void *sap,
}
/* Prevent race conditions with irda_release() and irda_shutdown() */
+ bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
- lock_sock(sk);
sk->sk_state = TCP_CLOSE;
- sk->sk_err = ECONNRESET;
sk->sk_shutdown |= SEND_SHUTDOWN;
sk->sk_state_change(sk);
- sock_orphan(sk);
- release_sock(sk);
/* Close our TSAP.
* If we leave it open, IrLMP put it back into the list of
@@ -159,6 +155,7 @@ static void irda_disconnect_indication(void *instance, void *sap,
self->tsap = NULL;
}
}
+ bh_unlock_sock(sk);
/* Note : once we are there, there is not much you want to do
* with the socket anymore, apart from closing it.
@@ -221,7 +218,7 @@ static void irda_connect_confirm(void *instance, void *sap,
break;
default:
self->max_data_size = irttp_get_max_seg_size(self->tsap);
- };
+ }
IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
self->max_data_size);
@@ -284,7 +281,7 @@ static void irda_connect_indication(void *instance, void *sap,
break;
default:
self->max_data_size = irttp_get_max_seg_size(self->tsap);
- };
+ }
IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
self->max_data_size);
@@ -307,8 +304,6 @@ static void irda_connect_response(struct irda_sock *self)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return;);
-
skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
GFP_ATOMIC);
if (skb == NULL) {
@@ -338,7 +333,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
self = instance;
sk = instance;
- IRDA_ASSERT(sk != NULL, return;);
+ BUG_ON(sk == NULL);
switch (flow) {
case FLOW_STOP:
@@ -450,7 +445,7 @@ static void irda_discovery_timeout(u_long priv)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
self = (struct irda_sock *) priv;
- IRDA_ASSERT(self != NULL, return;);
+ BUG_ON(self == NULL);
/* Nothing for the caller */
self->cachelog = NULL;
@@ -547,8 +542,6 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
{
IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
- IRDA_ASSERT(self != NULL, return -1;);
-
if (self->iriap) {
IRDA_WARNING("%s(): busy with a previous query\n",
__FUNCTION__);
@@ -636,8 +629,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
- IRDA_ASSERT(self != NULL, return -1;);
-
/* Ask lmp for the current discovery log
* Note : we have to use irlmp_get_discoveries(), as opposed
* to play with the cachelog directly, because while we are
@@ -785,8 +776,6 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct irda_sock *self = irda_sk(sk);
int err;
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
if (addr_len != sizeof(struct sockaddr_irda))
@@ -842,8 +831,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
-
err = irda_create(newsock, sk->sk_protocol);
if (err)
return err;
@@ -874,44 +861,28 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
* calling us, the data is waiting for us ;-)
* Jean II
*/
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- int ret = 0;
- DECLARE_WAITQUEUE(waitq, current);
+ while (1) {
+ skb = skb_dequeue(&sk->sk_receive_queue);
+ if (skb)
+ break;
/* Non blocking operation */
if (flags & O_NONBLOCK)
return -EWOULDBLOCK;
- /* The following code is a cut'n'paste of the
- * wait_event_interruptible() macro.
- * We don't us the macro because the condition has
- * side effects : we want to make sure that only one
- * skb get dequeued - Jean II */
- add_wait_queue(sk->sk_sleep, &waitq);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb != NULL)
- break;
- if (!signal_pending(current)) {
- schedule();
- continue;
- }
- ret = -ERESTARTSYS;
- break;
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &waitq);
- if(ret)
- return -ERESTARTSYS;
+ err = wait_event_interruptible(*(sk->sk_sleep),
+ skb_peek(&sk->sk_receive_queue));
+ if (err)
+ return err;
}
newsk = newsock->sk;
+ if (newsk == NULL)
+ return -EIO;
+
newsk->sk_state = TCP_ESTABLISHED;
new = irda_sk(newsk);
- IRDA_ASSERT(new != NULL, return -1;);
/* Now attach up the new socket */
new->tsap = irttp_dup(self->tsap, new);
@@ -1062,7 +1033,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_ESTABLISHED) {
sock->state = SS_UNCONNECTED;
- return sock_error(sk); /* Always set at this point */
+ err = sock_error(sk);
+ return err? err : -ECONNRESET;
}
sock->state = SS_CONNECTED;
@@ -1172,8 +1144,6 @@ static void irda_destroy_socket(struct irda_sock *self)
{
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
- IRDA_ASSERT(self != NULL, return;);
-
/* Unregister with IrLMP */
irlmp_unregister_client(self->ckey);
irlmp_unregister_service(self->skey);
@@ -1275,7 +1245,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct irda_sock *self;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1293,7 +1262,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENOTCONN;
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/* Check if IrTTP is wants us to slow down */
@@ -1318,9 +1286,9 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size + 16);
-
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1356,16 +1324,16 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(!sock_error(sk), return -1;);
+ if ((err = sock_error(sk)) < 0)
+ return err;
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
if (!skb)
return err;
- skb->h.raw = skb->data;
- copied = skb->len;
+ skb_reset_transport_header(skb);
+ copied = skb->len;
if (copied > size) {
IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
@@ -1404,13 +1372,13 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
struct irda_sock *self = irda_sk(sk);
int noblock = flags & MSG_DONTWAIT;
size_t copied = 0;
- int target = 1;
- DECLARE_WAITQUEUE(waitq, current);
+ int target, err;
+ long timeo;
IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(!sock_error(sk), return -1;);
+ if ((err = sock_error(sk)) < 0)
+ return err;
if (sock->flags & __SO_ACCEPTCON)
return(-EINVAL);
@@ -1418,8 +1386,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (flags & MSG_WAITALL)
- target = size;
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+ timeo = sock_rcvtimeo(sk, noblock);
msg->msg_namelen = 0;
@@ -1427,42 +1395,37 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
int chunk;
struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb==NULL) {
+ if (skb == NULL) {
+ DEFINE_WAIT(wait);
int ret = 0;
if (copied >= target)
break;
- /* The following code is a cut'n'paste of the
- * wait_event_interruptible() macro.
- * We don't us the macro because the test condition
- * is messy. - Jean II */
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- add_wait_queue(sk->sk_sleep, &waitq);
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
/*
* POSIX 1003.1g mandates this order.
*/
ret = sock_error(sk);
if (ret)
- break;
+ ;
else if (sk->sk_shutdown & RCV_SHUTDOWN)
;
else if (noblock)
ret = -EAGAIN;
else if (signal_pending(current))
- ret = -ERESTARTSYS;
+ ret = sock_intr_errno(timeo);
+ else if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -ENOTCONN;
else if (skb_peek(&sk->sk_receive_queue) == NULL)
/* Wait process until data arrives */
schedule();
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &waitq);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ finish_wait(sk->sk_sleep, &wait);
- if(ret)
- return(ret);
+ if (ret)
+ return ret;
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
@@ -1531,7 +1494,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct irda_sock *self;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1548,7 +1510,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
return -ENOTCONN;
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/*
* Check that we don't send out too big frames. This is an unreliable
@@ -1567,10 +1528,11 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size);
+ skb_reset_transport_header(skb);
IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1603,7 +1565,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
__u8 pid = 0;
int bound = 0;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1617,7 +1578,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
}
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/* Check if an address was specified with sendto. Jean II */
if (msg->msg_name) {
@@ -1663,10 +1623,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size);
+ skb_reset_transport_header(skb);
IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1690,8 +1651,6 @@ static int irda_shutdown(struct socket *sock, int how)
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
sk->sk_state = TCP_CLOSE;
@@ -1864,8 +1823,6 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
struct ias_attrib * ias_attr; /* Attribute in IAS object */
int opt;
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
if (level != SOL_IRLMP)
@@ -2581,7 +2538,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
};
#endif /* CONFIG_IRDA_ULTRA */
-#include <linux/smp_lock.h>
SOCKOPS_WRAP(irda_stream, PF_IRDA);
SOCKOPS_WRAP(irda_seqpacket, PF_IRDA);
SOCKOPS_WRAP(irda_dgram, PF_IRDA);
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 01d7c9c7b3b4..e5e4792a0314 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
* Inserting is a little bit tricky since we don't know how much
* room we will need. But this should hopefully work OK
*/
- count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb),
- &ircomm_param_info);
+ count = irda_param_insert(self, pi, skb_tail_pointer(skb),
+ skb_tailroom(skb), &ircomm_param_info);
if (count < 0) {
IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
spin_unlock_irqrestore(&self->spinlock, flags);
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e717801b38f9..7b5def1ea633 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(alloc_irdadev);
dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
{
struct dongle_reg *reg;
- dongle_t *dongle = NULL;
+ dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
might_sleep();
@@ -397,19 +397,14 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
if (!reg || !try_module_get(reg->owner) ) {
IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
type);
- goto out;
+ kfree(dongle);
+ dongle = NULL;
+ }
+ if (dongle) {
+ /* Bind the registration info to this particular instance */
+ dongle->issue = reg;
+ dongle->dev = dev;
}
-
- /* Allocate dongle info for this instance */
- dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
- if (!dongle)
- goto out;
-
- /* Bind the registration info to this particular instance */
- dongle->issue = reg;
- dongle->dev = dev;
-
- out:
spin_unlock(&dongles->hb_spinlock);
return dongle;
}
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index fcf9d6599628..ed69773b0f8e 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
}
/* Insert at end of sk-buffer */
- frame = skb->tail;
+ frame = skb_tail_pointer(skb);
/* Make space for data */
if (skb_tailroom(skb) < (param_len+value_len+3)) {
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 672ab3f69033..c421521c0a99 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -234,8 +234,7 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
* might have been previously set by the low level IrDA network
* device driver
*/
- skb->dev = self->dev;
- skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
+ skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
self->stats.rx_packets++;
self->stats.rx_bytes += skb->len;
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 7b6433fe1dc2..0b02073ffdf3 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -590,7 +590,7 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
if (!self->discovery_log) {
IRDA_WARNING("%s: discovery log is gone! "
"maybe the discovery timeout has been set"
- " to short?\n", __FUNCTION__);
+ " too short?\n", __FUNCTION__);
break;
}
hashbin_insert(self->discovery_log,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 0b04603e9c47..3c5a68e36414 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -93,7 +93,9 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
{
/* Some common init stuff */
skb->dev = self->netdev;
- skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
skb->protocol = htons(ETH_P_IRDA);
skb->priority = TC_PRIO_BESTEFFORT;
@@ -411,7 +413,7 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -482,7 +484,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
char *text;
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -526,7 +528,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
/* Check if things are sane at this point... */
if((discovery_info == NULL) ||
!pskb_may_pull(skb, 3)) {
- IRDA_ERROR("%s: discovery frame to short!\n",
+ IRDA_ERROR("%s: discovery frame too short!\n",
__FUNCTION__);
return;
}
@@ -1171,7 +1173,7 @@ static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
IRDA_ASSERT(info != NULL, return;);
if (!pskb_may_pull(skb, 4)) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -1260,7 +1262,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
if (!pskb_may_pull(skb, sizeof(*frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
frame = (struct test_frame *) skb->data;
@@ -1268,7 +1270,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
/* Broadcast frames must carry saddr and daddr fields */
if (info->caddr == CBROADCAST) {
if (skb->len < sizeof(struct test_frame)) {
- IRDA_DEBUG(0, "%s() test frame to short!\n",
+ IRDA_DEBUG(0, "%s() test frame too short!\n",
__FUNCTION__);
return;
}
@@ -1334,7 +1336,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
/* Check if frame is large enough for parsing */
if (!pskb_may_pull(skb, 2)) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
dev_kfree_skb(skb);
return -1;
}
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 873ae189e37a..bc2e15ce7004 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -419,7 +419,7 @@ typedef struct irnet_socket
u32 raccm; /* to please pppd - dummy) */
unsigned int flags; /* PPP flags (compression, ...) */
unsigned int rbits; /* Unused receive flags ??? */
-
+ struct work_struct disconnect_work; /* Process context disconnection */
/* ------------------------ IrTTP part ------------------------ */
/* We create a pseudo "socket" over the IrDA tranport */
unsigned long ttp_open; /* Set when IrTTP is ready */
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index c378e668af0c..a4f1439ffdd8 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -10,6 +10,27 @@
#include "irnet_irda.h" /* Private header */
+/*
+ * PPP disconnect work: we need to make sure we're in
+ * process context when calling ppp_unregister_channel().
+ */
+static void irnet_ppp_disconnect(struct work_struct *work)
+{
+ irnet_socket * self =
+ container_of(work, irnet_socket, disconnect_work);
+
+ if (self == NULL)
+ return;
+ /*
+ * If we were connected, cleanup & close the PPP
+ * channel, which will kill pppd (hangup) and the rest.
+ */
+ if (self->ppp_open && !self->ttp_open && !self->ttp_connect) {
+ ppp_unregister_channel(&self->chan);
+ self->ppp_open = 0;
+ }
+}
+
/************************* CONTROL CHANNEL *************************/
/*
* When ppp is not active, /dev/irnet act as a control channel.
@@ -499,6 +520,8 @@ irda_irnet_create(irnet_socket * self)
#endif /* DISCOVERY_NOMASK */
self->tx_flow = FLOW_START; /* Flow control from IrTTP */
+ INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
+
DEXIT(IRDA_SOCK_TRACE, "\n");
return(0);
}
@@ -1134,15 +1157,8 @@ irnet_disconnect_indication(void * instance,
{
if(test_open)
{
-#ifdef MISSING_PPP_API
- /* ppp_unregister_channel() wants a user context, which we
- * are guaranteed to NOT have here. What are we supposed
- * to do here ? Jean II */
- /* If we were connected, cleanup & close the PPP channel,
- * which will kill pppd (hangup) and the rest */
- ppp_unregister_channel(&self->chan);
- self->ppp_open = 0;
-#endif
+ /* ppp_unregister_channel() wants a user context. */
+ schedule_work(&self->disconnect_work);
}
else
{
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 92662330dbcf..d058b467f9e4 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -384,6 +384,9 @@ EXPORT_SYMBOL(hashbin_new);
* for deallocating this structure if it's complex. If not the user can
* just supply kfree, which should take care of the job.
*/
+#ifdef CONFIG_LOCKDEP
+static int hashbin_lock_depth = 0;
+#endif
int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
{
irda_queue_t* queue;
@@ -395,7 +398,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
/* Synchronize */
if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+ spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
+ hashbin_lock_depth++);
}
/*
@@ -419,6 +423,9 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
/* Release lock */
if ( hashbin->hb_type & HB_LOCK) {
spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+#ifdef CONFIG_LOCKDEP
+ hashbin_lock_depth--;
+#endif
}
/*
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index a7486b3bddcb..7069e4a58257 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -256,7 +256,7 @@ static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
* Copy all fragments to a new buffer
*/
while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
- memcpy(skb->data+n, frag->data, frag->len);
+ skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
n += frag->len;
dev_kfree_skb(frag);
@@ -314,8 +314,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
skb_reserve(frag, self->max_header_size);
/* Copy data from the original skb into this fragment. */
- memcpy(skb_put(frag, self->max_seg_size), skb->data,
- self->max_seg_size);
+ skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
+ self->max_seg_size);
/* Insert TTP header, with the more bit set */
frame = skb_push(frag, TTP_HEADER);
@@ -551,7 +551,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
}
if (skb->len > self->max_seg_size) {
- IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n",
+ IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
__FUNCTION__);
goto err;
}
@@ -598,7 +598,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
* inside an IrLAP frame
*/
if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
- IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n",
+ IRDA_ERROR("%s: SAR disabled, and data is too large for IrLAP!\n",
__FUNCTION__);
ret = -EMSGSIZE;
goto err;
@@ -1455,6 +1455,7 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
/* Not everything should be copied */
new->notify.instance = instance;
+ spin_lock_init(&new->lock);
init_timer(&new->todo_timer);
skb_queue_head_init(&new->rx_queue);
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 75a72d203b01..2627dad7cd87 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -160,7 +160,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
}
/* Check if buffer is long enough for insertion */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for insertion!\n",
+ IRDA_WARNING("%s: buffer too short for insertion!\n",
__FUNCTION__);
return -1;
}
@@ -216,7 +216,7 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
@@ -304,7 +304,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
@@ -343,7 +343,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 349012c926b7..aeb18cf1dcae 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
int ret;
/* Insert data rate */
- ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail,
+ ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert max turnaround time */
- ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail,
+ ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert data size */
- ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail,
+ ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert window size */
- ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail,
+ ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert additional BOFs */
- ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail,
+ ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert minimum turnaround time */
- ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail,
+ ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert link disconnect/threshold time */
- ret = irda_param_insert(self, PI_LINK_DISC, skb->tail,
+ ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 5abfb71aae8d..a7a7f191f1a8 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -239,7 +239,8 @@ async_bump(struct net_device *dev,
if(docopy) {
/* Copy data without CRC (lenght already checked) */
- memcpy(newskb->data, rx_buff->data, rx_buff->len - 2);
+ skb_copy_to_linear_data(newskb, rx_buff->data,
+ rx_buff->len - 2);
/* Deliver this skb */
dataskb = newskb;
} else {
@@ -256,7 +257,7 @@ async_bump(struct net_device *dev,
/* Feed it to IrLAP layer */
dataskb->dev = dev;
- dataskb->mac.raw = dataskb->data;
+ skb_reset_mac_header(dataskb);
dataskb->protocol = htons(ETH_P_IRDA);
netif_rx(dataskb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index acc94214bde6..d9e9ddb8eac5 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -45,7 +45,8 @@ static struct proto iucv_proto = {
static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
-static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]);
+static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
+ u8 ipuser[16]);
static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
static struct iucv_sock_list iucv_sk_list = {
@@ -147,11 +148,12 @@ static void iucv_sock_close(struct sock *sk)
unsigned char user_data[16];
struct iucv_sock *iucv = iucv_sk(sk);
int err;
+ unsigned long timeo;
iucv_sock_clear_timer(sk);
lock_sock(sk);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case IUCV_LISTEN:
iucv_sock_cleanup_listen(sk);
break;
@@ -159,6 +161,21 @@ static void iucv_sock_close(struct sock *sk)
case IUCV_CONNECTED:
case IUCV_DISCONN:
err = 0;
+
+ sk->sk_state = IUCV_CLOSING;
+ sk->sk_state_change(sk);
+
+ if (!skb_queue_empty(&iucv->send_skb_q)) {
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ timeo = sk->sk_lingertime;
+ else
+ timeo = IUCV_DISCONN_TIMEOUT;
+ err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+ }
+
+ sk->sk_state = IUCV_CLOSED;
+ sk->sk_state_change(sk);
+
if (iucv->path) {
low_nmcpy(user_data, iucv->src_name);
high_nmcpy(user_data, iucv->dst_name);
@@ -168,12 +185,11 @@ static void iucv_sock_close(struct sock *sk)
iucv->path = NULL;
}
- sk->sk_state = IUCV_CLOSED;
- sk->sk_state_change(sk);
sk->sk_err = ECONNRESET;
sk->sk_state_change(sk);
skb_queue_purge(&iucv->send_skb_q);
+ skb_queue_purge(&iucv->backlog_skb_q);
sock_set_flag(sk, SOCK_ZAPPED);
break;
@@ -181,7 +197,7 @@ static void iucv_sock_close(struct sock *sk)
default:
sock_set_flag(sk, SOCK_ZAPPED);
break;
- };
+ }
release_sock(sk);
iucv_sock_kill(sk);
@@ -204,6 +220,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
sock_init_data(sock, sk);
INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
+ skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
iucv_sk(sk)->send_tag = 0;
sk->sk_destruct = iucv_sock_destruct;
@@ -276,7 +293,7 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
struct iucv_sock *isk, *n;
struct sock *sk;
- list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+ list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
sk = (struct sock *) isk;
lock_sock(sk);
@@ -510,7 +527,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
long timeo;
int err = 0;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != IUCV_LISTEN) {
err = -EBADFD;
@@ -521,7 +538,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
/* Wait for an incoming connection */
add_wait_queue_exclusive(sk->sk_sleep, &wait);
- while (!(nsk = iucv_accept_dequeue(sk, newsock))){
+ while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
set_current_state(TASK_INTERRUPTIBLE);
if (!timeo) {
err = -EAGAIN;
@@ -530,7 +547,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
release_sock(sk);
timeo = schedule_timeout(timeo);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != IUCV_LISTEN) {
err = -EBADFD;
@@ -602,13 +619,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- if (sk->sk_state == IUCV_CONNECTED){
- if(!(skb = sock_alloc_send_skb(sk, len,
- msg->msg_flags & MSG_DONTWAIT,
- &err)))
- return err;
+ if (sk->sk_state == IUCV_CONNECTED) {
+ if (!(skb = sock_alloc_send_skb(sk, len,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err)))
+ goto out;
- if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
+ if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
err = -EFAULT;
goto fail;
}
@@ -647,10 +664,16 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
+ struct iucv_sock *iucv = iucv_sk(sk);
int target, copied = 0;
- struct sk_buff *skb;
+ struct sk_buff *skb, *rskb, *cskb;
int err = 0;
+ if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
+ skb_queue_empty(&iucv->backlog_skb_q) &&
+ skb_queue_empty(&sk->sk_receive_queue))
+ return 0;
+
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
@@ -665,10 +688,12 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = min_t(unsigned int, skb->len, len);
- if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) {
+ cskb = skb;
+ if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
skb_queue_head(&sk->sk_receive_queue, skb);
if (copied == 0)
return -EFAULT;
+ goto done;
}
len -= copied;
@@ -683,6 +708,18 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
kfree_skb(skb);
+
+ /* Queue backlog skbs */
+ rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+ while (rskb) {
+ if (sock_queue_rcv_skb(sk, rskb)) {
+ skb_queue_head(&iucv_sk(sk)->backlog_skb_q,
+ rskb);
+ break;
+ } else {
+ rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+ }
+ }
} else
skb_queue_head(&sk->sk_receive_queue, skb);
@@ -695,7 +732,7 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
struct iucv_sock *isk, *n;
struct sock *sk;
- list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+ list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
sk = (struct sock *) isk;
if (sk->sk_state == IUCV_CONNECTED)
@@ -726,12 +763,15 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
mask |= POLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ (sk->sk_shutdown & RCV_SHUTDOWN))
mask |= POLLIN | POLLRDNORM;
if (sk->sk_state == IUCV_CLOSED)
mask |= POLLHUP;
+ if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED)
+ mask |= POLLIN;
+
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
@@ -754,7 +794,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
return -EINVAL;
lock_sock(sk);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case IUCV_CLOSED:
err = -ENOTCONN;
goto fail;
@@ -770,7 +810,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
(void *) prmmsg, 8);
if (err) {
- switch(err) {
+ switch (err) {
case 1:
err = -ENOTCONN;
break;
@@ -817,13 +857,6 @@ static int iucv_sock_release(struct socket *sock)
iucv_sk(sk)->path = NULL;
}
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){
- lock_sock(sk);
- err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0,
- sk->sk_lingertime);
- release_sock(sk);
- }
-
sock_orphan(sk);
iucv_sock_kill(sk);
return err;
@@ -880,7 +913,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
/* Create the new socket */
nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
- if (!nsk){
+ if (!nsk) {
err = iucv_path_sever(path, user_data);
goto fail;
}
@@ -903,7 +936,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
path->msglim = IUCV_QUEUELEN_DEFAULT;
err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
- if (err){
+ if (err) {
err = iucv_path_sever(path, user_data);
goto fail;
}
@@ -927,18 +960,53 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16])
sk->sk_state_change(sk);
}
+static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
+ struct sk_buff_head *fragmented_skb_q)
+{
+ int dataleft, size, copied = 0;
+ struct sk_buff *nskb;
+
+ dataleft = len;
+ while (dataleft) {
+ if (dataleft >= sk->sk_rcvbuf / 4)
+ size = sk->sk_rcvbuf / 4;
+ else
+ size = dataleft;
+
+ nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
+ if (!nskb)
+ return -ENOMEM;
+
+ memcpy(nskb->data, skb->data + copied, size);
+ copied += size;
+ dataleft -= size;
+
+ skb_reset_transport_header(nskb);
+ skb_reset_network_header(nskb);
+ nskb->len = size;
+
+ skb_queue_tail(fragmented_skb_q, nskb);
+ }
+
+ return 0;
+}
+
static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
{
struct sock *sk = path->private;
- struct sk_buff *skb;
+ struct iucv_sock *iucv = iucv_sk(sk);
+ struct sk_buff *skb, *fskb;
+ struct sk_buff_head fragmented_skb_q;
int rc;
+ skb_queue_head_init(&fragmented_skb_q);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
return;
skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
if (!skb) {
- iucv_message_reject(path, msg);
+ iucv_path_sever(path, NULL);
return;
}
@@ -952,14 +1020,39 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
kfree_skb(skb);
return;
}
+ if (skb->truesize >= sk->sk_rcvbuf / 4) {
+ rc = iucv_fragment_skb(sk, skb, msg->length,
+ &fragmented_skb_q);
+ kfree_skb(skb);
+ skb = NULL;
+ if (rc) {
+ iucv_path_sever(path, NULL);
+ return;
+ }
+ } else {
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+ skb->len = msg->length;
+ }
+ }
+ /* Queue the fragmented skb */
+ fskb = skb_dequeue(&fragmented_skb_q);
+ while (fskb) {
+ if (!skb_queue_empty(&iucv->backlog_skb_q))
+ skb_queue_tail(&iucv->backlog_skb_q, fskb);
+ else if (sock_queue_rcv_skb(sk, fskb))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb);
+ fskb = skb_dequeue(&fragmented_skb_q);
+ }
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
- skb->len = msg->length;
+ /* Queue the original skb if it exists (was not fragmented) */
+ if (skb) {
+ if (!skb_queue_empty(&iucv->backlog_skb_q))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+ else if (sock_queue_rcv_skb(sk, skb))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
}
- if (sock_queue_rcv_skb(sk, skb))
- kfree_skb(skb);
}
static void iucv_callback_txdone(struct iucv_path *path,
@@ -971,17 +1064,27 @@ static void iucv_callback_txdone(struct iucv_path *path,
struct sk_buff *list_skb = list->next;
unsigned long flags;
- spin_lock_irqsave(&list->lock, flags);
+ if (list_skb) {
+ spin_lock_irqsave(&list->lock, flags);
+
+ do {
+ this = list_skb;
+ list_skb = list_skb->next;
+ } while (memcmp(&msg->tag, this->cb, 4) && list_skb);
+
+ spin_unlock_irqrestore(&list->lock, flags);
- do {
- this = list_skb;
- list_skb = list_skb->next;
- } while (memcmp(&msg->tag, this->cb, 4));
+ skb_unlink(this, &iucv_sk(sk)->send_skb_q);
+ kfree_skb(this);
+ }
- spin_unlock_irqrestore(&list->lock, flags);
+ if (sk->sk_state == IUCV_CLOSING) {
+ if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+ sk->sk_state = IUCV_CLOSED;
+ sk->sk_state_change(sk);
+ }
+ }
- skb_unlink(this, &iucv_sk(sk)->send_skb_q);
- kfree_skb(this);
}
static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
@@ -1022,7 +1125,7 @@ static struct net_proto_family iucv_sock_family_ops = {
.create = iucv_sock_create,
};
-static int afiucv_init(void)
+static int __init afiucv_init(void)
{
int err;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1b10d576f222..fb3faf72e850 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -32,7 +32,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
-
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -69,7 +68,7 @@
#define IUCV_IPNORPY 0x10
#define IUCV_IPALL 0x80
-static int iucv_bus_match (struct device *dev, struct device_driver *drv)
+static int iucv_bus_match(struct device *dev, struct device_driver *drv)
{
return 0;
}
@@ -78,8 +77,11 @@ struct bus_type iucv_bus = {
.name = "iucv",
.match = iucv_bus_match,
};
+EXPORT_SYMBOL(iucv_bus);
struct device *iucv_root;
+EXPORT_SYMBOL(iucv_root);
+
static int iucv_available;
/* General IUCV interrupt structure */
@@ -90,20 +92,43 @@ struct iucv_irq_data {
u32 res2[8];
};
-struct iucv_work {
+struct iucv_irq_list {
struct list_head list;
struct iucv_irq_data data;
};
-static LIST_HEAD(iucv_work_queue);
-static DEFINE_SPINLOCK(iucv_work_lock);
-
static struct iucv_irq_data *iucv_irq_data;
static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
-static void iucv_tasklet_handler(unsigned long);
-static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0);
+/*
+ * Queue of interrupt buffers lock for delivery via the tasklet
+ * (fast but can't call smp_call_function).
+ */
+static LIST_HEAD(iucv_task_queue);
+
+/*
+ * The tasklet for fast delivery of iucv interrupts.
+ */
+static void iucv_tasklet_fn(unsigned long);
+static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0);
+
+/*
+ * Queue of interrupt buffers for delivery via a work queue
+ * (slower but can call smp_call_function).
+ */
+static LIST_HEAD(iucv_work_queue);
+
+/*
+ * The work element to deliver path pending interrupts.
+ */
+static void iucv_work_fn(struct work_struct *work);
+static DECLARE_WORK(iucv_work, iucv_work_fn);
+
+/*
+ * Spinlock protecting task and work queue.
+ */
+static DEFINE_SPINLOCK(iucv_queue_lock);
enum iucv_command_codes {
IUCV_QUERY = 0,
@@ -147,10 +172,10 @@ static unsigned long iucv_max_pathid;
static DEFINE_SPINLOCK(iucv_table_lock);
/*
- * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet.
- * Needed for iucv_path_sever called from tasklet.
+ * iucv_active_cpu: contains the number of the cpu executing the tasklet
+ * or the work handler. Needed for iucv_path_sever called from tasklet.
*/
-static int iucv_tasklet_cpu = -1;
+static int iucv_active_cpu = -1;
/*
* Mutex and wait queue for iucv_register/iucv_unregister.
@@ -382,7 +407,7 @@ static void iucv_declare_cpu(void *data)
rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
if (rc) {
char *err = "Unknown";
- switch(rc) {
+ switch (rc) {
case 0x03:
err = "Directory error";
break;
@@ -449,17 +474,19 @@ static void iucv_setmask_mp(void)
{
int cpu;
+ preempt_disable();
for_each_online_cpu(cpu)
/* Enable all cpus with a declared buffer. */
if (cpu_isset(cpu, iucv_buffer_cpumask) &&
!cpu_isset(cpu, iucv_irq_cpumask))
smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu);
+ preempt_enable();
}
/**
* iucv_setmask_up
*
- * Allow iucv interrupts on a single cpus.
+ * Allow iucv interrupts on a single cpu.
*/
static void iucv_setmask_up(void)
{
@@ -493,8 +520,10 @@ static int iucv_enable(void)
goto out;
/* Declare per cpu buffers. */
rc = -EIO;
+ preempt_disable();
for_each_online_cpu(cpu)
smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+ preempt_enable();
if (cpus_empty(iucv_buffer_cpumask))
/* No cpu could declare an iucv buffer. */
goto out_path;
@@ -519,7 +548,6 @@ static void iucv_disable(void)
kfree(iucv_path_table);
}
-#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -562,10 +590,9 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block iucv_cpu_notifier = {
+static struct notifier_block __cpuinitdata iucv_cpu_notifier = {
.notifier_call = iucv_cpu_notify,
};
-#endif
/**
* iucv_sever_pathid
@@ -586,48 +613,49 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
return iucv_call_b2f0(IUCV_SEVER, parm);
}
+#ifdef CONFIG_SMP
/**
- * __iucv_cleanup_pathid
+ * __iucv_cleanup_queue
* @dummy: unused dummy argument
*
* Nop function called via smp_call_function to force work items from
* pending external iucv interrupts to the work queue.
*/
-static void __iucv_cleanup_pathid(void *dummy)
+static void __iucv_cleanup_queue(void *dummy)
{
}
+#endif
/**
- * iucv_cleanup_pathid
- * @pathid: 16 bit pathid
+ * iucv_cleanup_queue
*
* Function called after a path has been severed to find all remaining
* work items for the now stale pathid. The caller needs to hold the
* iucv_table_lock.
*/
-static void iucv_cleanup_pathid(u16 pathid)
+static void iucv_cleanup_queue(void)
{
- struct iucv_work *p, *n;
+ struct iucv_irq_list *p, *n;
/*
- * Path is severed, the pathid can be reused immediatly on
- * a iucv connect or a connection pending interrupt.
- * iucv_path_connect and connection pending interrupt will
- * wait until the iucv_table_lock is released before the
- * recycled pathid enters the system.
- * Force remaining interrupts to the work queue, then
- * scan the work queue for items of this path.
+ * When a path is severed, the pathid can be reused immediatly
+ * on a iucv connect or a connection pending interrupt. Remove
+ * all entries from the task queue that refer to a stale pathid
+ * (iucv_path_table[ix] == NULL). Only then do the iucv connect
+ * or deliver the connection pending interrupt. To get all the
+ * pending interrupts force them to the work queue by calling
+ * an empty function on all cpus.
*/
- smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1);
- spin_lock_irq(&iucv_work_lock);
- list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
- /* Remove work items for pathid except connection pending */
- if (p->data.ippathid == pathid && p->data.iptype != 0x01) {
+ smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+ spin_lock_irq(&iucv_queue_lock);
+ list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
+ /* Remove stale work items from the task queue. */
+ if (iucv_path_table[p->data.ippathid] == NULL) {
list_del(&p->list);
kfree(p);
}
}
- spin_unlock_irq(&iucv_work_lock);
+ spin_unlock_irq(&iucv_queue_lock);
}
/**
@@ -665,6 +693,7 @@ out_mutex:
mutex_unlock(&iucv_register_mutex);
return rc;
}
+EXPORT_SYMBOL(iucv_register);
/**
* iucv_unregister
@@ -686,7 +715,6 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
iucv_sever_pathid(p->pathid, NULL);
iucv_path_table[p->pathid] = NULL;
list_del(&p->list);
- iucv_cleanup_pathid(p->pathid);
iucv_path_free(p);
}
spin_unlock_bh(&iucv_table_lock);
@@ -698,6 +726,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
iucv_setmask_mp();
mutex_unlock(&iucv_register_mutex);
}
+EXPORT_SYMBOL(iucv_unregister);
/**
* iucv_path_accept
@@ -736,6 +765,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_accept);
/**
* iucv_path_connect
@@ -759,9 +789,9 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
union iucv_param *parm;
int rc;
- preempt_disable();
- if (iucv_tasklet_cpu != smp_processor_id())
- spin_lock_bh(&iucv_table_lock);
+ BUG_ON(in_atomic());
+ spin_lock_bh(&iucv_table_lock);
+ iucv_cleanup_queue();
parm = percpu_ptr(iucv_param, smp_processor_id());
memset(parm, 0, sizeof(union iucv_param));
parm->ctrl.ipmsglim = path->msglim;
@@ -796,11 +826,10 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
rc = -EIO;
}
}
- if (iucv_tasklet_cpu != smp_processor_id())
- spin_unlock_bh(&iucv_table_lock);
- preempt_enable();
+ spin_unlock_bh(&iucv_table_lock);
return rc;
}
+EXPORT_SYMBOL(iucv_path_connect);
/**
* iucv_path_quiesce:
@@ -827,6 +856,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_quiesce);
/**
* iucv_path_resume:
@@ -867,21 +897,20 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
{
int rc;
-
preempt_disable();
- if (iucv_tasklet_cpu != smp_processor_id())
+ if (iucv_active_cpu != smp_processor_id())
spin_lock_bh(&iucv_table_lock);
rc = iucv_sever_pathid(path->pathid, userdata);
if (!rc) {
iucv_path_table[path->pathid] = NULL;
list_del_init(&path->list);
- iucv_cleanup_pathid(path->pathid);
}
- if (iucv_tasklet_cpu != smp_processor_id())
+ if (iucv_active_cpu != smp_processor_id())
spin_unlock_bh(&iucv_table_lock);
preempt_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_sever);
/**
* iucv_message_purge
@@ -914,6 +943,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_purge);
/**
* iucv_message_receive
@@ -984,6 +1014,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_receive);
/**
* iucv_message_reject
@@ -1012,6 +1043,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_reject);
/**
* iucv_message_reply
@@ -1055,6 +1087,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_reply);
/**
* iucv_message_send
@@ -1103,6 +1136,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_send);
/**
* iucv_message_send2way
@@ -1159,6 +1193,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_send2way);
/**
* iucv_path_pending
@@ -1246,8 +1281,7 @@ static void iucv_path_complete(struct iucv_irq_data *data)
struct iucv_path_complete *ipc = (void *) data;
struct iucv_path *path = iucv_path_table[ipc->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_complete)
+ if (path && path->handler && path->handler->path_complete)
path->handler->path_complete(path, ipc->ipuser);
}
@@ -1275,14 +1309,14 @@ static void iucv_path_severed(struct iucv_irq_data *data)
struct iucv_path_severed *ips = (void *) data;
struct iucv_path *path = iucv_path_table[ips->ippathid];
- BUG_ON(!path || !path->handler);
+ if (!path || !path->handler) /* Already severed */
+ return;
if (path->handler->path_severed)
path->handler->path_severed(path, ips->ipuser);
else {
iucv_sever_pathid(path->pathid, NULL);
iucv_path_table[path->pathid] = NULL;
list_del_init(&path->list);
- iucv_cleanup_pathid(path->pathid);
iucv_path_free(path);
}
}
@@ -1311,8 +1345,7 @@ static void iucv_path_quiesced(struct iucv_irq_data *data)
struct iucv_path_quiesced *ipq = (void *) data;
struct iucv_path *path = iucv_path_table[ipq->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_quiesced)
+ if (path && path->handler && path->handler->path_quiesced)
path->handler->path_quiesced(path, ipq->ipuser);
}
@@ -1340,8 +1373,7 @@ static void iucv_path_resumed(struct iucv_irq_data *data)
struct iucv_path_resumed *ipr = (void *) data;
struct iucv_path *path = iucv_path_table[ipr->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_resumed)
+ if (path && path->handler && path->handler->path_resumed)
path->handler->path_resumed(path, ipr->ipuser);
}
@@ -1373,8 +1405,7 @@ static void iucv_message_complete(struct iucv_irq_data *data)
struct iucv_path *path = iucv_path_table[imc->ippathid];
struct iucv_message msg;
- BUG_ON(!path || !path->handler);
- if (path->handler->message_complete) {
+ if (path && path->handler && path->handler->message_complete) {
msg.flags = imc->ipflags1;
msg.id = imc->ipmsgid;
msg.audit = imc->ipaudit;
@@ -1419,8 +1450,7 @@ static void iucv_message_pending(struct iucv_irq_data *data)
struct iucv_path *path = iucv_path_table[imp->ippathid];
struct iucv_message msg;
- BUG_ON(!path || !path->handler);
- if (path->handler->message_pending) {
+ if (path && path->handler && path->handler->message_pending) {
msg.flags = imp->ipflags1;
msg.id = imp->ipmsgid;
msg.class = imp->iptrgcls;
@@ -1435,17 +1465,16 @@ static void iucv_message_pending(struct iucv_irq_data *data)
}
/**
- * iucv_tasklet_handler:
+ * iucv_tasklet_fn:
*
* This tasklet loops over the queue of irq buffers created by
* iucv_external_interrupt, calls the appropriate action handler
* and then frees the buffer.
*/
-static void iucv_tasklet_handler(unsigned long ignored)
+static void iucv_tasklet_fn(unsigned long ignored)
{
typedef void iucv_irq_fn(struct iucv_irq_data *);
static iucv_irq_fn *irq_fn[] = {
- [0x01] = iucv_path_pending,
[0x02] = iucv_path_complete,
[0x03] = iucv_path_severed,
[0x04] = iucv_path_quiesced,
@@ -1455,38 +1484,70 @@ static void iucv_tasklet_handler(unsigned long ignored)
[0x08] = iucv_message_pending,
[0x09] = iucv_message_pending,
};
- struct iucv_work *p;
+ struct list_head task_queue = LIST_HEAD_INIT(task_queue);
+ struct iucv_irq_list *p, *n;
/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
spin_lock(&iucv_table_lock);
- iucv_tasklet_cpu = smp_processor_id();
+ iucv_active_cpu = smp_processor_id();
- spin_lock_irq(&iucv_work_lock);
- while (!list_empty(&iucv_work_queue)) {
- p = list_entry(iucv_work_queue.next, struct iucv_work, list);
+ spin_lock_irq(&iucv_queue_lock);
+ list_splice_init(&iucv_task_queue, &task_queue);
+ spin_unlock_irq(&iucv_queue_lock);
+
+ list_for_each_entry_safe(p, n, &task_queue, list) {
list_del_init(&p->list);
- spin_unlock_irq(&iucv_work_lock);
irq_fn[p->data.iptype](&p->data);
kfree(p);
- spin_lock_irq(&iucv_work_lock);
}
- spin_unlock_irq(&iucv_work_lock);
- iucv_tasklet_cpu = -1;
+ iucv_active_cpu = -1;
spin_unlock(&iucv_table_lock);
}
/**
+ * iucv_work_fn:
+ *
+ * This work function loops over the queue of path pending irq blocks
+ * created by iucv_external_interrupt, calls the appropriate action
+ * handler and then frees the buffer.
+ */
+static void iucv_work_fn(struct work_struct *work)
+{
+ typedef void iucv_irq_fn(struct iucv_irq_data *);
+ struct list_head work_queue = LIST_HEAD_INIT(work_queue);
+ struct iucv_irq_list *p, *n;
+
+ /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
+ spin_lock_bh(&iucv_table_lock);
+ iucv_active_cpu = smp_processor_id();
+
+ spin_lock_irq(&iucv_queue_lock);
+ list_splice_init(&iucv_work_queue, &work_queue);
+ spin_unlock_irq(&iucv_queue_lock);
+
+ iucv_cleanup_queue();
+ list_for_each_entry_safe(p, n, &work_queue, list) {
+ list_del_init(&p->list);
+ iucv_path_pending(&p->data);
+ kfree(p);
+ }
+
+ iucv_active_cpu = -1;
+ spin_unlock_bh(&iucv_table_lock);
+}
+
+/**
* iucv_external_interrupt
* @code: irq code
*
* Handles external interrupts coming in from CP.
- * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler().
+ * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
*/
static void iucv_external_interrupt(u16 code)
{
struct iucv_irq_data *p;
- struct iucv_work *work;
+ struct iucv_irq_list *work;
p = percpu_ptr(iucv_irq_data, smp_processor_id());
if (p->ippathid >= iucv_max_pathid) {
@@ -1500,16 +1561,23 @@ static void iucv_external_interrupt(u16 code)
printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
return;
}
- work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC);
+ work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
if (!work) {
printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
return;
}
memcpy(&work->data, p, sizeof(work->data));
- spin_lock(&iucv_work_lock);
- list_add_tail(&work->list, &iucv_work_queue);
- spin_unlock(&iucv_work_lock);
- tasklet_schedule(&iucv_tasklet);
+ spin_lock(&iucv_queue_lock);
+ if (p->iptype == 0x01) {
+ /* Path pending interrupt. */
+ list_add_tail(&work->list, &iucv_work_queue);
+ schedule_work(&iucv_work);
+ } else {
+ /* The other interrupts. */
+ list_add_tail(&work->list, &iucv_task_queue);
+ tasklet_schedule(&iucv_tasklet);
+ }
+ spin_unlock(&iucv_queue_lock);
}
/**
@@ -1517,7 +1585,7 @@ static void iucv_external_interrupt(u16 code)
*
* Allocates and initializes various data structures.
*/
-static int iucv_init(void)
+static int __init iucv_init(void)
{
int rc;
@@ -1528,7 +1596,7 @@ static int iucv_init(void)
rc = iucv_query_maxconn();
if (rc)
goto out;
- rc = register_external_interrupt (0x4000, iucv_external_interrupt);
+ rc = register_external_interrupt(0x4000, iucv_external_interrupt);
if (rc)
goto out;
rc = bus_register(&iucv_bus);
@@ -1539,7 +1607,7 @@ static int iucv_init(void)
rc = PTR_ERR(iucv_root);
goto out_bus;
}
- /* Note: GFP_DMA used used to get memory below 2G */
+ /* Note: GFP_DMA used to get memory below 2G */
iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data),
GFP_KERNEL|GFP_DMA);
if (!iucv_irq_data) {
@@ -1577,14 +1645,16 @@ out:
*
* Frees everything allocated from iucv_init.
*/
-static void iucv_exit(void)
+static void __exit iucv_exit(void)
{
- struct iucv_work *p, *n;
+ struct iucv_irq_list *p, *n;
- spin_lock_irq(&iucv_work_lock);
+ spin_lock_irq(&iucv_queue_lock);
+ list_for_each_entry_safe(p, n, &iucv_task_queue, list)
+ kfree(p);
list_for_each_entry_safe(p, n, &iucv_work_queue, list)
kfree(p);
- spin_unlock_irq(&iucv_work_lock);
+ spin_unlock_irq(&iucv_queue_lock);
unregister_hotcpu_notifier(&iucv_cpu_notifier);
percpu_free(iucv_param);
percpu_free(iucv_irq_data);
@@ -1596,24 +1666,6 @@ static void iucv_exit(void)
subsys_initcall(iucv_init);
module_exit(iucv_exit);
-/**
- * Export all public stuff
- */
-EXPORT_SYMBOL (iucv_bus);
-EXPORT_SYMBOL (iucv_root);
-EXPORT_SYMBOL (iucv_register);
-EXPORT_SYMBOL (iucv_unregister);
-EXPORT_SYMBOL (iucv_path_accept);
-EXPORT_SYMBOL (iucv_path_connect);
-EXPORT_SYMBOL (iucv_path_quiesce);
-EXPORT_SYMBOL (iucv_path_sever);
-EXPORT_SYMBOL (iucv_message_purge);
-EXPORT_SYMBOL (iucv_message_receive);
-EXPORT_SYMBOL (iucv_message_reject);
-EXPORT_SYMBOL (iucv_message_reply);
-EXPORT_SYMBOL (iucv_message_send);
-EXPORT_SYMBOL (iucv_message_send2way);
-
MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
MODULE_LICENSE("GPL");
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a4e7e2db0ff3..a99444142dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
*/
return -EINVAL;
break;
- };
+ }
return 0;
}
@@ -630,6 +630,35 @@ pfkey_sockaddr_size(sa_family_t family)
/* NOTREACHED */
}
+static inline int pfkey_mode_from_xfrm(int mode)
+{
+ switch(mode) {
+ case XFRM_MODE_TRANSPORT:
+ return IPSEC_MODE_TRANSPORT;
+ case XFRM_MODE_TUNNEL:
+ return IPSEC_MODE_TUNNEL;
+ case XFRM_MODE_BEET:
+ return IPSEC_MODE_BEET;
+ default:
+ return -1;
+ }
+}
+
+static inline int pfkey_mode_to_xfrm(int mode)
+{
+ switch(mode) {
+ case IPSEC_MODE_ANY: /*XXX*/
+ case IPSEC_MODE_TRANSPORT:
+ return XFRM_MODE_TRANSPORT;
+ case IPSEC_MODE_TUNNEL:
+ return XFRM_MODE_TUNNEL;
+ case IPSEC_MODE_BEET:
+ return XFRM_MODE_BEET;
+ default:
+ return -1;
+ }
+}
+
static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc)
{
struct sk_buff *skb;
@@ -651,6 +680,7 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
int encrypt_key_size = 0;
int sockaddr_size;
struct xfrm_encap_tmpl *natt = NULL;
+ int mode;
/* address family check */
sockaddr_size = pfkey_sockaddr_size(x->props.family);
@@ -928,7 +958,11 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
sa2 = (struct sadb_x_sa2 *) skb_put(skb, sizeof(struct sadb_x_sa2));
sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
- sa2->sadb_x_sa2_mode = x->props.mode + 1;
+ if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) {
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+ }
+ sa2->sadb_x_sa2_mode = mode;
sa2->sadb_x_sa2_reserved1 = 0;
sa2->sadb_x_sa2_reserved2 = 0;
sa2->sadb_x_sa2_sequence = 0;
@@ -1155,9 +1189,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
if (ext_hdrs[SADB_X_EXT_SA2-1]) {
struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
- x->props.mode = sa2->sadb_x_sa2_mode;
- if (x->props.mode)
- x->props.mode--;
+ int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
+ if (mode < 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ x->props.mode = mode;
x->props.reqid = sa2->sadb_x_sa2_reqid;
}
@@ -1218,7 +1255,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
struct sadb_address *saddr, *daddr;
struct sadb_msg *out_hdr;
struct xfrm_state *x = NULL;
- u8 mode;
+ int mode;
u32 reqid;
u8 proto;
unsigned short family;
@@ -1233,7 +1270,9 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
return -EINVAL;
if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) {
- mode = sa2->sadb_x_sa2_mode - 1;
+ mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
+ if (mode < 0)
+ return -EINVAL;
reqid = sa2->sadb_x_sa2_reqid;
} else {
mode = 0;
@@ -1756,6 +1795,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct sockaddr_in6 *sin6;
#endif
+ int mode;
if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
return -ELOOP;
@@ -1764,7 +1804,9 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
return -EINVAL;
t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
- t->mode = rq->sadb_x_ipsecrequest_mode-1;
+ if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
+ return -EINVAL;
+ t->mode = mode;
if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
t->optional = 1;
else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
@@ -1877,7 +1919,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
return skb;
}
-static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
+static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
{
struct sadb_msg *hdr;
struct sadb_address *addr;
@@ -2014,6 +2056,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
struct sadb_x_ipsecrequest *rq;
struct xfrm_tmpl *t = xp->xfrm_vec + i;
int req_size;
+ int mode;
req_size = sizeof(struct sadb_x_ipsecrequest);
if (t->mode == XFRM_MODE_TUNNEL)
@@ -2027,7 +2070,9 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
memset(rq, 0, sizeof(*rq));
rq->sadb_x_ipsecrequest_len = req_size;
rq->sadb_x_ipsecrequest_proto = t->id.proto;
- rq->sadb_x_ipsecrequest_mode = t->mode+1;
+ if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0)
+ return -EINVAL;
+ rq->sadb_x_ipsecrequest_mode = mode;
rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE;
if (t->reqid)
rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE;
@@ -2089,6 +2134,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
hdr->sadb_msg_len = size / sizeof(uint64_t);
hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
+
+ return 0;
}
static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -2102,7 +2149,9 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
err = PTR_ERR(out_skb);
goto out;
}
- pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ if (err < 0)
+ return err;
out_hdr = (struct sadb_msg *) out_skb->data;
out_hdr->sadb_msg_version = PF_KEY_V2;
@@ -2327,7 +2376,9 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb
err = PTR_ERR(out_skb);
goto out;
}
- pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ if (err < 0)
+ goto out;
out_hdr = (struct sadb_msg *) out_skb->data;
out_hdr->sadb_msg_version = hdr->sadb_msg_version;
@@ -2409,6 +2460,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
{
int err;
struct sadb_x_ipsecrequest *rq2;
+ int mode;
if (len <= sizeof(struct sadb_x_ipsecrequest) ||
len < rq1->sadb_x_ipsecrequest_len)
@@ -2439,7 +2491,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
return -EINVAL;
m->proto = rq1->sadb_x_ipsecrequest_proto;
- m->mode = rq1->sadb_x_ipsecrequest_mode - 1;
+ if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0)
+ return -EINVAL;
+ m->mode = mode;
m->reqid = rq1->sadb_x_ipsecrequest_reqid;
return ((int)(rq1->sadb_x_ipsecrequest_len +
@@ -2579,12 +2633,15 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
struct pfkey_dump_data *data = ptr;
struct sk_buff *out_skb;
struct sadb_msg *out_hdr;
+ int err;
out_skb = pfkey_xfrm_policy2msg_prep(xp);
if (IS_ERR(out_skb))
return PTR_ERR(out_skb);
- pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+ if (err < 0)
+ return err;
out_hdr = (struct sadb_msg *) out_skb->data;
out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
@@ -3513,7 +3570,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
for (i = 0, mp = m; i < num_bundles; i++, mp++) {
/* old ipsecrequest */
- if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+ int mode = pfkey_mode_from_xfrm(mp->mode);
+ if (mode < 0)
+ return -EINVAL;
+ if (set_ipsecrequest(skb, mp->proto, mode,
(mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
mp->reqid, mp->old_family,
&mp->old_saddr, &mp->old_daddr) < 0) {
@@ -3521,7 +3581,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
}
/* new ipsecrequest */
- if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+ if (set_ipsecrequest(skb, mp->proto, mode,
(mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
mp->reqid, mp->new_family,
&mp->new_saddr, &mp->new_daddr) < 0) {
@@ -3607,7 +3667,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (err)
goto out_free;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index d12413cff5bd..d4b13a031fd5 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -160,8 +160,14 @@ static struct packet_type llc_tr_packet_type = {
static int __init llc_init(void)
{
- if (dev_base->next)
- memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN);
+ struct net_device *dev;
+
+ dev = first_net_device();
+ if (dev != NULL)
+ dev = next_net_device(dev);
+
+ if (dev != NULL)
+ memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN);
else
memset(llc_station_mac_sa, 0, ETH_ALEN);
dev_add_pack(&llc_packet_type);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index b3f65d1e80b1..099ed8fec145 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -112,7 +112,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, llc_len)))
return 0;
- skb->h.raw += llc_len;
+ skb->transport_header += llc_len;
skb_pull(skb, llc_len);
if (skb->protocol == htons(ETH_P_802_2)) {
__be16 pdulen = eth_hdr(skb)->h_proto;
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index f4291f349e92..754f4fedc852 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -41,7 +41,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
struct net_device *dev = skb->dev;
struct trh_hdr *trh;
- skb->mac.raw = skb_push(skb, sizeof(*trh));
+ skb_push(skb, sizeof(*trh));
+ skb_reset_mac_header(skb);
trh = tr_hdr(skb);
trh->ac = AC;
trh->fc = LLC_FRAME;
@@ -52,7 +53,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
if (da) {
memcpy(trh->daddr, da, dev->addr_len);
tr_source_route(skb, trh, dev);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
}
break;
}
@@ -62,7 +63,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
unsigned short len = skb->len;
struct ethhdr *eth;
- skb->mac.raw = skb_push(skb, sizeof(*eth));
+ skb_push(skb, sizeof(*eth));
+ skb_reset_mac_header(skb);
eth = eth_hdr(skb);
eth->h_proto = htons(len);
memcpy(eth->h_dest, da, ETH_ALEN);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2615dc81aa36..2525165e2e8f 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -36,11 +36,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
if (skb) {
+ skb_reset_mac_header(skb);
skb_reserve(skb, 50);
- skb->nh.raw = skb->h.raw = skb->data;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
skb->protocol = htons(ETH_P_802_2);
skb->dev = dev;
- skb->mac.raw = skb->head;
if (sk != NULL)
skb_set_owner_w(skb, sk);
}
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
new file mode 100644
index 000000000000..6fffb3845ab6
--- /dev/null
+++ b/net/mac80211/Kconfig
@@ -0,0 +1,78 @@
+config MAC80211
+ tristate "Generic IEEE 802.11 Networking Stack (mac80211)"
+ depends on EXPERIMENTAL
+ select CRYPTO
+ select CRYPTO_ECB
+ select CRYPTO_ARC4
+ select CRYPTO_AES
+ select CRC32
+ select WIRELESS_EXT
+ select CFG80211
+ select NET_SCH_FIFO
+ ---help---
+ This option enables the hardware independent IEEE 802.11
+ networking stack.
+
+config MAC80211_LEDS
+ bool "Enable LED triggers"
+ depends on MAC80211 && LEDS_TRIGGERS
+ ---help---
+ This option enables a few LED triggers for different
+ packet receive/transmit events.
+
+config MAC80211_DEBUGFS
+ bool "Export mac80211 internals in DebugFS"
+ depends on MAC80211 && DEBUG_FS
+ ---help---
+ Select this to see extensive information about
+ the internal state of mac80211 in debugfs.
+
+ Say N unless you know you need this.
+
+config MAC80211_DEBUG
+ bool "Enable debugging output"
+ depends on MAC80211
+ ---help---
+ This option will enable debug tracing output for the
+ ieee80211 network stack.
+
+ If you are not trying to debug or develop the ieee80211
+ subsystem, you most likely want to say N here.
+
+config MAC80211_VERBOSE_DEBUG
+ bool "Verbose debugging output"
+ depends on MAC80211_DEBUG
+
+config MAC80211_LOWTX_FRAME_DUMP
+ bool "Debug frame dumping"
+ depends on MAC80211_DEBUG
+ ---help---
+ Selecting this option will cause the stack to
+ print a message for each frame that is handed
+ to the lowlevel driver for transmission. This
+ message includes all MAC addresses and the
+ frame control field.
+
+ If unsure, say N and insert the debugging code
+ you require into the driver you are debugging.
+
+config TKIP_DEBUG
+ bool "TKIP debugging"
+ depends on MAC80211_DEBUG
+
+config MAC80211_DEBUG_COUNTERS
+ bool "Extra statistics for TX/RX debugging"
+ depends on MAC80211_DEBUG
+
+config MAC80211_IBSS_DEBUG
+ bool "Support for IBSS testing"
+ depends on MAC80211_DEBUG
+ ---help---
+ Say Y here if you intend to debug the IBSS code.
+
+config MAC80211_VERBOSE_PS_DEBUG
+ bool "Verbose powersave mode debugging"
+ depends on MAC80211_DEBUG
+ ---help---
+ Say Y here to print out verbose powersave
+ mode debug messages.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
new file mode 100644
index 000000000000..e9738dad2d7c
--- /dev/null
+++ b/net/mac80211/Makefile
@@ -0,0 +1,20 @@
+obj-$(CONFIG_MAC80211) += mac80211.o rc80211_simple.o
+
+mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
+mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o
+
+mac80211-objs := \
+ ieee80211.o \
+ ieee80211_ioctl.o \
+ sta_info.o \
+ wep.o \
+ wpa.o \
+ ieee80211_sta.o \
+ ieee80211_iface.o \
+ ieee80211_rate.o \
+ michael.o \
+ tkip.o \
+ aes_ccm.o \
+ wme.o \
+ ieee80211_cfg.o \
+ $(mac80211-objs-y)
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
new file mode 100644
index 000000000000..e55569bee7d0
--- /dev/null
+++ b/net/mac80211/aes_ccm.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2003-2004, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <asm/scatterlist.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_key.h"
+#include "aes_ccm.h"
+
+
+static void ieee80211_aes_encrypt(struct crypto_cipher *tfm,
+ const u8 pt[16], u8 ct[16])
+{
+ crypto_cipher_encrypt_one(tfm, ct, pt);
+}
+
+
+static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad,
+ u8 *b, u8 *s_0, u8 *a)
+{
+ int i;
+
+ ieee80211_aes_encrypt(tfm, b_0, b);
+
+ /* Extra Authenticate-only data (always two AES blocks) */
+ for (i = 0; i < AES_BLOCK_LEN; i++)
+ aad[i] ^= b[i];
+ ieee80211_aes_encrypt(tfm, aad, b);
+
+ aad += AES_BLOCK_LEN;
+
+ for (i = 0; i < AES_BLOCK_LEN; i++)
+ aad[i] ^= b[i];
+ ieee80211_aes_encrypt(tfm, aad, a);
+
+ /* Mask out bits from auth-only-b_0 */
+ b_0[0] &= 0x07;
+
+ /* S_0 is used to encrypt T (= MIC) */
+ b_0[14] = 0;
+ b_0[15] = 0;
+ ieee80211_aes_encrypt(tfm, b_0, s_0);
+}
+
+
+void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
+ u8 *b_0, u8 *aad, u8 *data, size_t data_len,
+ u8 *cdata, u8 *mic)
+{
+ int i, j, last_len, num_blocks;
+ u8 *pos, *cpos, *b, *s_0, *e;
+
+ b = scratch;
+ s_0 = scratch + AES_BLOCK_LEN;
+ e = scratch + 2 * AES_BLOCK_LEN;
+
+ num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN;
+ last_len = data_len % AES_BLOCK_LEN;
+ aes_ccm_prepare(tfm, b_0, aad, b, s_0, b);
+
+ /* Process payload blocks */
+ pos = data;
+ cpos = cdata;
+ for (j = 1; j <= num_blocks; j++) {
+ int blen = (j == num_blocks && last_len) ?
+ last_len : AES_BLOCK_LEN;
+
+ /* Authentication followed by encryption */
+ for (i = 0; i < blen; i++)
+ b[i] ^= pos[i];
+ ieee80211_aes_encrypt(tfm, b, b);
+
+ b_0[14] = (j >> 8) & 0xff;
+ b_0[15] = j & 0xff;
+ ieee80211_aes_encrypt(tfm, b_0, e);
+ for (i = 0; i < blen; i++)
+ *cpos++ = *pos++ ^ e[i];
+ }
+
+ for (i = 0; i < CCMP_MIC_LEN; i++)
+ mic[i] = b[i] ^ s_0[i];
+}
+
+
+int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
+ u8 *b_0, u8 *aad, u8 *cdata, size_t data_len,
+ u8 *mic, u8 *data)
+{
+ int i, j, last_len, num_blocks;
+ u8 *pos, *cpos, *b, *s_0, *a;
+
+ b = scratch;
+ s_0 = scratch + AES_BLOCK_LEN;
+ a = scratch + 2 * AES_BLOCK_LEN;
+
+ num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN;
+ last_len = data_len % AES_BLOCK_LEN;
+ aes_ccm_prepare(tfm, b_0, aad, b, s_0, a);
+
+ /* Process payload blocks */
+ cpos = cdata;
+ pos = data;
+ for (j = 1; j <= num_blocks; j++) {
+ int blen = (j == num_blocks && last_len) ?
+ last_len : AES_BLOCK_LEN;
+
+ /* Decryption followed by authentication */
+ b_0[14] = (j >> 8) & 0xff;
+ b_0[15] = j & 0xff;
+ ieee80211_aes_encrypt(tfm, b_0, b);
+ for (i = 0; i < blen; i++) {
+ *pos = *cpos++ ^ b[i];
+ a[i] ^= *pos++;
+ }
+
+ ieee80211_aes_encrypt(tfm, a, a);
+ }
+
+ for (i = 0; i < CCMP_MIC_LEN; i++) {
+ if ((mic[i] ^ s_0[i]) != a[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+
+struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[])
+{
+ struct crypto_cipher *tfm;
+
+ tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return NULL;
+
+ crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
+
+ return tfm;
+}
+
+
+void ieee80211_aes_key_free(struct crypto_cipher *tfm)
+{
+ if (tfm)
+ crypto_free_cipher(tfm);
+}
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
new file mode 100644
index 000000000000..885f19030b29
--- /dev/null
+++ b/net/mac80211/aes_ccm.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2003-2004, Instant802 Networks, Inc.
+ * Copyright 2006, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_CCM_H
+#define AES_CCM_H
+
+#include <linux/crypto.h>
+
+#define AES_BLOCK_LEN 16
+
+struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]);
+void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
+ u8 *b_0, u8 *aad, u8 *data, size_t data_len,
+ u8 *cdata, u8 *mic);
+int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
+ u8 *b_0, u8 *aad, u8 *cdata, size_t data_len,
+ u8 *mic, u8 *data);
+void ieee80211_aes_key_free(struct crypto_cipher *tfm);
+
+#endif /* AES_CCM_H */
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
new file mode 100644
index 000000000000..bb6c0feb2d48
--- /dev/null
+++ b/net/mac80211/debugfs.c
@@ -0,0 +1,433 @@
+/*
+ * mac80211 debugfs for wireless PHYs
+ *
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * GPLv2
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/rtnetlink.h>
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "debugfs.h"
+
+int mac80211_open_file_generic(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static const char *ieee80211_mode_str(int mode)
+{
+ switch (mode) {
+ case MODE_IEEE80211A:
+ return "IEEE 802.11a";
+ case MODE_IEEE80211B:
+ return "IEEE 802.11b";
+ case MODE_IEEE80211G:
+ return "IEEE 802.11g";
+ case MODE_ATHEROS_TURBO:
+ return "Atheros Turbo (5 GHz)";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static ssize_t modes_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ struct ieee80211_hw_mode *mode;
+ char buf[150], *p = buf;
+
+ /* FIXME: locking! */
+ list_for_each_entry(mode, &local->modes_list, list) {
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "%s\n", ieee80211_mode_str(mode->mode));
+ }
+
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf);
+}
+
+static const struct file_operations modes_ops = {
+ .read = modes_read,
+ .open = mac80211_open_file_generic,
+};
+
+#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...) \
+static ssize_t name## _read(struct file *file, char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct ieee80211_local *local = file->private_data; \
+ char buf[buflen]; \
+ int res; \
+ \
+ res = scnprintf(buf, buflen, fmt "\n", ##value); \
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \
+} \
+ \
+static const struct file_operations name## _ops = { \
+ .read = name## _read, \
+ .open = mac80211_open_file_generic, \
+};
+
+#define DEBUGFS_ADD(name) \
+ local->debugfs.name = debugfs_create_file(#name, 0444, phyd, \
+ local, &name## _ops);
+
+#define DEBUGFS_DEL(name) \
+ debugfs_remove(local->debugfs.name); \
+ local->debugfs.name = NULL;
+
+
+DEBUGFS_READONLY_FILE(channel, 20, "%d",
+ local->hw.conf.channel);
+DEBUGFS_READONLY_FILE(frequency, 20, "%d",
+ local->hw.conf.freq);
+DEBUGFS_READONLY_FILE(radar_detect, 20, "%d",
+ local->hw.conf.radar_detect);
+DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d",
+ local->hw.conf.antenna_sel_tx);
+DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d",
+ local->hw.conf.antenna_sel_rx);
+DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d",
+ local->bridge_packets);
+DEBUGFS_READONLY_FILE(key_tx_rx_threshold, 20, "%d",
+ local->key_tx_rx_threshold);
+DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
+ local->rts_threshold);
+DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
+ local->fragmentation_threshold);
+DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d",
+ local->short_retry_limit);
+DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
+ local->long_retry_limit);
+DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
+ local->total_ps_buffered);
+DEBUGFS_READONLY_FILE(mode, 20, "%s",
+ ieee80211_mode_str(local->hw.conf.phymode));
+DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x",
+ local->wep_iv & 0xffffff);
+DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm",
+ local->hw.conf.tx_power_reduction / 10,
+ local->hw.conf.tx_power_reduction & 10);
+DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s",
+ local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>");
+
+/* statistics stuff */
+
+static inline int rtnl_lock_local(struct ieee80211_local *local)
+{
+ rtnl_lock();
+ if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) {
+ rtnl_unlock();
+ return -ENODEV;
+ }
+ return 0;
+}
+
+#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \
+ DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value)
+
+static ssize_t format_devstat_counter(struct ieee80211_local *local,
+ char __user *userbuf,
+ size_t count, loff_t *ppos,
+ int (*printvalue)(struct ieee80211_low_level_stats *stats, char *buf,
+ int buflen))
+{
+ struct ieee80211_low_level_stats stats;
+ char buf[20];
+ int res;
+
+ if (!local->ops->get_stats)
+ return -EOPNOTSUPP;
+
+ res = rtnl_lock_local(local);
+ if (res)
+ return res;
+
+ res = local->ops->get_stats(local_to_hw(local), &stats);
+ rtnl_unlock();
+ if (!res)
+ res = printvalue(&stats, buf, sizeof(buf));
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+
+#define DEBUGFS_DEVSTATS_FILE(name) \
+static int print_devstats_##name(struct ieee80211_low_level_stats *stats,\
+ char *buf, int buflen) \
+{ \
+ return scnprintf(buf, buflen, "%u\n", stats->name); \
+} \
+static ssize_t stats_ ##name## _read(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return format_devstat_counter(file->private_data, \
+ userbuf, \
+ count, \
+ ppos, \
+ print_devstats_##name); \
+} \
+ \
+static const struct file_operations stats_ ##name## _ops = { \
+ .read = stats_ ##name## _read, \
+ .open = mac80211_open_file_generic, \
+};
+
+#define DEBUGFS_STATS_ADD(name) \
+ local->debugfs.stats.name = debugfs_create_file(#name, 0444, statsd,\
+ local, &stats_ ##name## _ops);
+
+#define DEBUGFS_STATS_DEL(name) \
+ debugfs_remove(local->debugfs.stats.name); \
+ local->debugfs.stats.name = NULL;
+
+DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u",
+ local->dot11TransmittedFragmentCount);
+DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u",
+ local->dot11MulticastTransmittedFrameCount);
+DEBUGFS_STATS_FILE(failed_count, 20, "%u",
+ local->dot11FailedCount);
+DEBUGFS_STATS_FILE(retry_count, 20, "%u",
+ local->dot11RetryCount);
+DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u",
+ local->dot11MultipleRetryCount);
+DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u",
+ local->dot11FrameDuplicateCount);
+DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u",
+ local->dot11ReceivedFragmentCount);
+DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u",
+ local->dot11MulticastReceivedFrameCount);
+DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u",
+ local->dot11TransmittedFrameCount);
+DEBUGFS_STATS_FILE(wep_undecryptable_count, 20, "%u",
+ local->dot11WEPUndecryptableCount);
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u",
+ local->tx_handlers_drop);
+DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u",
+ local->tx_handlers_queued);
+DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u",
+ local->tx_handlers_drop_unencrypted);
+DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u",
+ local->tx_handlers_drop_fragment);
+DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u",
+ local->tx_handlers_drop_wep);
+DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u",
+ local->tx_handlers_drop_not_assoc);
+DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u",
+ local->tx_handlers_drop_unauth_port);
+DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u",
+ local->rx_handlers_drop);
+DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u",
+ local->rx_handlers_queued);
+DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u",
+ local->rx_handlers_drop_nullfunc);
+DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u",
+ local->rx_handlers_drop_defrag);
+DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u",
+ local->rx_handlers_drop_short);
+DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u",
+ local->rx_handlers_drop_passive_scan);
+DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u",
+ local->tx_expand_skb_head);
+DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u",
+ local->tx_expand_skb_head_cloned);
+DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u",
+ local->rx_expand_skb_head);
+DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u",
+ local->rx_expand_skb_head2);
+DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u",
+ local->rx_handlers_fragments);
+DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u",
+ local->tx_status_drop);
+
+static ssize_t stats_wme_rx_queue_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[NUM_RX_DATA_QUEUES*15], *p = buf;
+ int i;
+
+ for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "%u\n", local->wme_rx_queue[i]);
+
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf);
+}
+
+static const struct file_operations stats_wme_rx_queue_ops = {
+ .read = stats_wme_rx_queue_read,
+ .open = mac80211_open_file_generic,
+};
+
+static ssize_t stats_wme_tx_queue_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[NUM_TX_DATA_QUEUES*15], *p = buf;
+ int i;
+
+ for (i = 0; i < NUM_TX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "%u\n", local->wme_tx_queue[i]);
+
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf);
+}
+
+static const struct file_operations stats_wme_tx_queue_ops = {
+ .read = stats_wme_tx_queue_read,
+ .open = mac80211_open_file_generic,
+};
+#endif
+
+DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount);
+DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount);
+DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount);
+DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount);
+
+
+void debugfs_hw_add(struct ieee80211_local *local)
+{
+ struct dentry *phyd = local->hw.wiphy->debugfsdir;
+ struct dentry *statsd;
+
+ if (!phyd)
+ return;
+
+ local->debugfs.stations = debugfs_create_dir("stations", phyd);
+ local->debugfs.keys = debugfs_create_dir("keys", phyd);
+
+ DEBUGFS_ADD(channel);
+ DEBUGFS_ADD(frequency);
+ DEBUGFS_ADD(radar_detect);
+ DEBUGFS_ADD(antenna_sel_tx);
+ DEBUGFS_ADD(antenna_sel_rx);
+ DEBUGFS_ADD(bridge_packets);
+ DEBUGFS_ADD(key_tx_rx_threshold);
+ DEBUGFS_ADD(rts_threshold);
+ DEBUGFS_ADD(fragmentation_threshold);
+ DEBUGFS_ADD(short_retry_limit);
+ DEBUGFS_ADD(long_retry_limit);
+ DEBUGFS_ADD(total_ps_buffered);
+ DEBUGFS_ADD(mode);
+ DEBUGFS_ADD(wep_iv);
+ DEBUGFS_ADD(tx_power_reduction);
+ DEBUGFS_ADD(modes);
+
+ statsd = debugfs_create_dir("statistics", phyd);
+ local->debugfs.statistics = statsd;
+
+ /* if the dir failed, don't put all the other things into the root! */
+ if (!statsd)
+ return;
+
+ DEBUGFS_STATS_ADD(transmitted_fragment_count);
+ DEBUGFS_STATS_ADD(multicast_transmitted_frame_count);
+ DEBUGFS_STATS_ADD(failed_count);
+ DEBUGFS_STATS_ADD(retry_count);
+ DEBUGFS_STATS_ADD(multiple_retry_count);
+ DEBUGFS_STATS_ADD(frame_duplicate_count);
+ DEBUGFS_STATS_ADD(received_fragment_count);
+ DEBUGFS_STATS_ADD(multicast_received_frame_count);
+ DEBUGFS_STATS_ADD(transmitted_frame_count);
+ DEBUGFS_STATS_ADD(wep_undecryptable_count);
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ DEBUGFS_STATS_ADD(tx_handlers_drop);
+ DEBUGFS_STATS_ADD(tx_handlers_queued);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_fragment);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_wep);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port);
+ DEBUGFS_STATS_ADD(rx_handlers_drop);
+ DEBUGFS_STATS_ADD(rx_handlers_queued);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_short);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
+ DEBUGFS_STATS_ADD(rx_expand_skb_head);
+ DEBUGFS_STATS_ADD(rx_expand_skb_head2);
+ DEBUGFS_STATS_ADD(rx_handlers_fragments);
+ DEBUGFS_STATS_ADD(tx_status_drop);
+ DEBUGFS_STATS_ADD(wme_tx_queue);
+ DEBUGFS_STATS_ADD(wme_rx_queue);
+#endif
+ DEBUGFS_STATS_ADD(dot11ACKFailureCount);
+ DEBUGFS_STATS_ADD(dot11RTSFailureCount);
+ DEBUGFS_STATS_ADD(dot11FCSErrorCount);
+ DEBUGFS_STATS_ADD(dot11RTSSuccessCount);
+}
+
+void debugfs_hw_del(struct ieee80211_local *local)
+{
+ DEBUGFS_DEL(channel);
+ DEBUGFS_DEL(frequency);
+ DEBUGFS_DEL(radar_detect);
+ DEBUGFS_DEL(antenna_sel_tx);
+ DEBUGFS_DEL(antenna_sel_rx);
+ DEBUGFS_DEL(bridge_packets);
+ DEBUGFS_DEL(key_tx_rx_threshold);
+ DEBUGFS_DEL(rts_threshold);
+ DEBUGFS_DEL(fragmentation_threshold);
+ DEBUGFS_DEL(short_retry_limit);
+ DEBUGFS_DEL(long_retry_limit);
+ DEBUGFS_DEL(total_ps_buffered);
+ DEBUGFS_DEL(mode);
+ DEBUGFS_DEL(wep_iv);
+ DEBUGFS_DEL(tx_power_reduction);
+ DEBUGFS_DEL(modes);
+
+ DEBUGFS_STATS_DEL(transmitted_fragment_count);
+ DEBUGFS_STATS_DEL(multicast_transmitted_frame_count);
+ DEBUGFS_STATS_DEL(failed_count);
+ DEBUGFS_STATS_DEL(retry_count);
+ DEBUGFS_STATS_DEL(multiple_retry_count);
+ DEBUGFS_STATS_DEL(frame_duplicate_count);
+ DEBUGFS_STATS_DEL(received_fragment_count);
+ DEBUGFS_STATS_DEL(multicast_received_frame_count);
+ DEBUGFS_STATS_DEL(transmitted_frame_count);
+ DEBUGFS_STATS_DEL(wep_undecryptable_count);
+ DEBUGFS_STATS_DEL(num_scans);
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ DEBUGFS_STATS_DEL(tx_handlers_drop);
+ DEBUGFS_STATS_DEL(tx_handlers_queued);
+ DEBUGFS_STATS_DEL(tx_handlers_drop_unencrypted);
+ DEBUGFS_STATS_DEL(tx_handlers_drop_fragment);
+ DEBUGFS_STATS_DEL(tx_handlers_drop_wep);
+ DEBUGFS_STATS_DEL(tx_handlers_drop_not_assoc);
+ DEBUGFS_STATS_DEL(tx_handlers_drop_unauth_port);
+ DEBUGFS_STATS_DEL(rx_handlers_drop);
+ DEBUGFS_STATS_DEL(rx_handlers_queued);
+ DEBUGFS_STATS_DEL(rx_handlers_drop_nullfunc);
+ DEBUGFS_STATS_DEL(rx_handlers_drop_defrag);
+ DEBUGFS_STATS_DEL(rx_handlers_drop_short);
+ DEBUGFS_STATS_DEL(rx_handlers_drop_passive_scan);
+ DEBUGFS_STATS_DEL(tx_expand_skb_head);
+ DEBUGFS_STATS_DEL(tx_expand_skb_head_cloned);
+ DEBUGFS_STATS_DEL(rx_expand_skb_head);
+ DEBUGFS_STATS_DEL(rx_expand_skb_head2);
+ DEBUGFS_STATS_DEL(rx_handlers_fragments);
+ DEBUGFS_STATS_DEL(tx_status_drop);
+ DEBUGFS_STATS_DEL(wme_tx_queue);
+ DEBUGFS_STATS_DEL(wme_rx_queue);
+#endif
+ DEBUGFS_STATS_DEL(dot11ACKFailureCount);
+ DEBUGFS_STATS_DEL(dot11RTSFailureCount);
+ DEBUGFS_STATS_DEL(dot11FCSErrorCount);
+ DEBUGFS_STATS_DEL(dot11RTSSuccessCount);
+
+ debugfs_remove(local->debugfs.statistics);
+ local->debugfs.statistics = NULL;
+ debugfs_remove(local->debugfs.stations);
+ local->debugfs.stations = NULL;
+ debugfs_remove(local->debugfs.keys);
+ local->debugfs.keys = NULL;
+}
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
new file mode 100644
index 000000000000..dd2541935c27
--- /dev/null
+++ b/net/mac80211/debugfs.h
@@ -0,0 +1,16 @@
+#ifndef __MAC80211_DEBUGFS_H
+#define __MAC80211_DEBUGFS_H
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+extern void debugfs_hw_add(struct ieee80211_local *local);
+extern void debugfs_hw_del(struct ieee80211_local *local);
+extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
+#else
+static inline void debugfs_hw_add(struct ieee80211_local *local)
+{
+ return;
+}
+static inline void debugfs_hw_del(struct ieee80211_local *local) {}
+#endif
+
+#endif /* __MAC80211_DEBUGFS_H */
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
new file mode 100644
index 000000000000..7d56dc9e7326
--- /dev/null
+++ b/net/mac80211/debugfs_key.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2003-2005 Devicescape Software, Inc.
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kobject.h>
+#include "ieee80211_i.h"
+#include "ieee80211_key.h"
+#include "debugfs.h"
+#include "debugfs_key.h"
+
+#define KEY_READ(name, buflen, format_string) \
+static ssize_t key_##name##_read(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ char buf[buflen]; \
+ struct ieee80211_key *key = file->private_data; \
+ int res = scnprintf(buf, buflen, format_string, key->name); \
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \
+}
+#define KEY_READ_D(name) KEY_READ(name, 20, "%d\n")
+
+#define KEY_OPS(name) \
+static const struct file_operations key_ ##name## _ops = { \
+ .read = key_##name##_read, \
+ .open = mac80211_open_file_generic, \
+}
+
+#define KEY_FILE(name, format) \
+ KEY_READ_##format(name) \
+ KEY_OPS(name)
+
+KEY_FILE(keylen, D);
+KEY_FILE(force_sw_encrypt, D);
+KEY_FILE(keyidx, D);
+KEY_FILE(hw_key_idx, D);
+KEY_FILE(tx_rx_count, D);
+
+static ssize_t key_algorithm_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char *alg;
+ struct ieee80211_key *key = file->private_data;
+
+ switch (key->alg) {
+ case ALG_WEP:
+ alg = "WEP\n";
+ break;
+ case ALG_TKIP:
+ alg = "TKIP\n";
+ break;
+ case ALG_CCMP:
+ alg = "CCMP\n";
+ break;
+ default:
+ return 0;
+ }
+ return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
+}
+KEY_OPS(algorithm);
+
+static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ const u8 *tpn;
+ char buf[20];
+ int len;
+ struct ieee80211_key *key = file->private_data;
+
+ switch (key->alg) {
+ case ALG_WEP:
+ len = scnprintf(buf, sizeof(buf), "\n");
+ case ALG_TKIP:
+ len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
+ key->u.tkip.iv32,
+ key->u.tkip.iv16);
+ case ALG_CCMP:
+ tpn = key->u.ccmp.tx_pn;
+ len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+ tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
+ default:
+ return 0;
+ }
+ return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+KEY_OPS(tx_spec);
+
+static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_key *key = file->private_data;
+ char buf[14*NUM_RX_DATA_QUEUES+1], *p = buf;
+ int i, len;
+ const u8 *rpn;
+
+ switch (key->alg) {
+ case ALG_WEP:
+ len = scnprintf(buf, sizeof(buf), "\n");
+ case ALG_TKIP:
+ for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "%08x %04x\n",
+ key->u.tkip.iv32_rx[i],
+ key->u.tkip.iv16_rx[i]);
+ len = p - buf;
+ case ALG_CCMP:
+ for (i = 0; i < NUM_RX_DATA_QUEUES; i++) {
+ rpn = key->u.ccmp.rx_pn[i];
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "%02x%02x%02x%02x%02x%02x\n",
+ rpn[0], rpn[1], rpn[2],
+ rpn[3], rpn[4], rpn[5]);
+ }
+ len = p - buf;
+ default:
+ return 0;
+ }
+ return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+KEY_OPS(rx_spec);
+
+static ssize_t key_replays_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_key *key = file->private_data;
+ char buf[20];
+ int len;
+
+ if (key->alg != ALG_CCMP)
+ return 0;
+ len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, len);
+}
+KEY_OPS(replays);
+
+static ssize_t key_key_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_key *key = file->private_data;
+ int i, res, bufsize = 2*key->keylen+2;
+ char *buf = kmalloc(bufsize, GFP_KERNEL);
+ char *p = buf;
+
+ for (i = 0; i < key->keylen; i++)
+ p += scnprintf(p, bufsize+buf-p, "%02x", key->key[i]);
+ p += scnprintf(p, bufsize+buf-p, "\n");
+ res = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return res;
+}
+KEY_OPS(key);
+
+#define DEBUGFS_ADD(name) \
+ key->debugfs.name = debugfs_create_file(#name, 0400,\
+ key->debugfs.dir, key, &key_##name##_ops);
+
+void ieee80211_debugfs_key_add(struct ieee80211_local *local,
+ struct ieee80211_key *key)
+{
+ char buf[20];
+
+ if (!local->debugfs.keys)
+ return;
+
+ sprintf(buf, "%d", key->keyidx);
+ key->debugfs.dir = debugfs_create_dir(buf,
+ local->debugfs.keys);
+
+ if (!key->debugfs.dir)
+ return;
+
+ DEBUGFS_ADD(keylen);
+ DEBUGFS_ADD(force_sw_encrypt);
+ DEBUGFS_ADD(keyidx);
+ DEBUGFS_ADD(hw_key_idx);
+ DEBUGFS_ADD(tx_rx_count);
+ DEBUGFS_ADD(algorithm);
+ DEBUGFS_ADD(tx_spec);
+ DEBUGFS_ADD(rx_spec);
+ DEBUGFS_ADD(replays);
+ DEBUGFS_ADD(key);
+};
+
+#define DEBUGFS_DEL(name) \
+ debugfs_remove(key->debugfs.name); key->debugfs.name = NULL;
+
+void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
+{
+ if (!key)
+ return;
+
+ DEBUGFS_DEL(keylen);
+ DEBUGFS_DEL(force_sw_encrypt);
+ DEBUGFS_DEL(keyidx);
+ DEBUGFS_DEL(hw_key_idx);
+ DEBUGFS_DEL(tx_rx_count);
+ DEBUGFS_DEL(algorithm);
+ DEBUGFS_DEL(tx_spec);
+ DEBUGFS_DEL(rx_spec);
+ DEBUGFS_DEL(replays);
+ DEBUGFS_DEL(key);
+
+ debugfs_remove(key->debugfs.stalink);
+ key->debugfs.stalink = NULL;
+ debugfs_remove(key->debugfs.dir);
+ key->debugfs.dir = NULL;
+}
+void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
+{
+ char buf[50];
+
+ if (!sdata->debugfsdir)
+ return;
+
+ sprintf(buf, "../keys/%d", sdata->default_key->keyidx);
+ sdata->debugfs.default_key =
+ debugfs_create_symlink("default_key", sdata->debugfsdir, buf);
+}
+void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
+{
+ if (!sdata)
+ return;
+
+ debugfs_remove(sdata->debugfs.default_key);
+ sdata->debugfs.default_key = NULL;
+}
+void ieee80211_debugfs_key_sta_link(struct ieee80211_key *key,
+ struct sta_info *sta)
+{
+ char buf[50];
+
+ if (!key->debugfs.dir)
+ return;
+
+ sprintf(buf, "../sta/" MAC_FMT, MAC_ARG(sta->addr));
+ key->debugfs.stalink =
+ debugfs_create_symlink("station", key->debugfs.dir, buf);
+}
+
+void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
+ struct sta_info *sta)
+{
+ debugfs_remove(key->debugfs.stalink);
+ key->debugfs.stalink = NULL;
+}
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
new file mode 100644
index 000000000000..aecfce395da6
--- /dev/null
+++ b/net/mac80211/debugfs_key.h
@@ -0,0 +1,34 @@
+#ifndef __MAC80211_DEBUGFS_KEY_H
+#define __MAC80211_DEBUGFS_KEY_H
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+void ieee80211_debugfs_key_add(struct ieee80211_local *local,
+ struct ieee80211_key *key);
+void ieee80211_debugfs_key_remove(struct ieee80211_key *key);
+void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_key_sta_link(struct ieee80211_key *key,
+ struct sta_info *sta);
+void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
+ struct sta_info *sta);
+#else
+static inline void ieee80211_debugfs_key_add(struct ieee80211_local *local,
+ struct ieee80211_key *key)
+{}
+static inline void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
+{}
+static inline void ieee80211_debugfs_key_add_default(
+ struct ieee80211_sub_if_data *sdata)
+{}
+static inline void ieee80211_debugfs_key_remove_default(
+ struct ieee80211_sub_if_data *sdata)
+{}
+static inline void ieee80211_debugfs_key_sta_link(
+ struct ieee80211_key *key, struct sta_info *sta)
+{}
+static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
+ struct sta_info *sta)
+{}
+#endif
+
+#endif /* __MAC80211_DEBUGFS_KEY_H */
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
new file mode 100644
index 000000000000..9e3964638bad
--- /dev/null
+++ b/net/mac80211/debugfs_netdev.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/if.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <net/mac80211.h>
+#include <net/cfg80211.h>
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "debugfs.h"
+#include "debugfs_netdev.h"
+
+static ssize_t ieee80211_if_read(
+ struct ieee80211_sub_if_data *sdata,
+ char __user *userbuf,
+ size_t count, loff_t *ppos,
+ ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int))
+{
+ char buf[70];
+ ssize_t ret = -EINVAL;
+
+ read_lock(&dev_base_lock);
+ if (sdata->dev->reg_state == NETREG_REGISTERED) {
+ ret = (*format)(sdata, buf, sizeof(buf));
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+ }
+ read_unlock(&dev_base_lock);
+ return ret;
+}
+
+#define IEEE80211_IF_FMT(name, field, format_string) \
+static ssize_t ieee80211_if_fmt_##name( \
+ const struct ieee80211_sub_if_data *sdata, char *buf, \
+ int buflen) \
+{ \
+ return scnprintf(buf, buflen, format_string, sdata->field); \
+}
+#define IEEE80211_IF_FMT_DEC(name, field) \
+ IEEE80211_IF_FMT(name, field, "%d\n")
+#define IEEE80211_IF_FMT_HEX(name, field) \
+ IEEE80211_IF_FMT(name, field, "%#x\n")
+#define IEEE80211_IF_FMT_SIZE(name, field) \
+ IEEE80211_IF_FMT(name, field, "%zd\n")
+
+#define IEEE80211_IF_FMT_ATOMIC(name, field) \
+static ssize_t ieee80211_if_fmt_##name( \
+ const struct ieee80211_sub_if_data *sdata, \
+ char *buf, int buflen) \
+{ \
+ return scnprintf(buf, buflen, "%d\n", atomic_read(&sdata->field));\
+}
+
+#define IEEE80211_IF_FMT_MAC(name, field) \
+static ssize_t ieee80211_if_fmt_##name( \
+ const struct ieee80211_sub_if_data *sdata, char *buf, \
+ int buflen) \
+{ \
+ return scnprintf(buf, buflen, MAC_FMT "\n", MAC_ARG(sdata->field));\
+}
+
+#define __IEEE80211_IF_FILE(name) \
+static ssize_t ieee80211_if_read_##name(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return ieee80211_if_read(file->private_data, \
+ userbuf, count, ppos, \
+ ieee80211_if_fmt_##name); \
+} \
+static const struct file_operations name##_ops = { \
+ .read = ieee80211_if_read_##name, \
+ .open = mac80211_open_file_generic, \
+}
+
+#define IEEE80211_IF_FILE(name, field, format) \
+ IEEE80211_IF_FMT_##format(name, field) \
+ __IEEE80211_IF_FILE(name)
+
+/* common attributes */
+IEEE80211_IF_FILE(channel_use, channel_use, DEC);
+IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
+IEEE80211_IF_FILE(eapol, eapol, DEC);
+IEEE80211_IF_FILE(ieee8021_x, ieee802_1x, DEC);
+
+/* STA/IBSS attributes */
+IEEE80211_IF_FILE(state, u.sta.state, DEC);
+IEEE80211_IF_FILE(bssid, u.sta.bssid, MAC);
+IEEE80211_IF_FILE(prev_bssid, u.sta.prev_bssid, MAC);
+IEEE80211_IF_FILE(ssid_len, u.sta.ssid_len, SIZE);
+IEEE80211_IF_FILE(aid, u.sta.aid, DEC);
+IEEE80211_IF_FILE(ap_capab, u.sta.ap_capab, HEX);
+IEEE80211_IF_FILE(capab, u.sta.capab, HEX);
+IEEE80211_IF_FILE(extra_ie_len, u.sta.extra_ie_len, SIZE);
+IEEE80211_IF_FILE(auth_tries, u.sta.auth_tries, DEC);
+IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC);
+IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX);
+IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC);
+IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC);
+
+static ssize_t ieee80211_if_fmt_flags(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n",
+ sdata->u.sta.ssid_set ? "SSID\n" : "",
+ sdata->u.sta.bssid_set ? "BSSID\n" : "",
+ sdata->u.sta.prev_bssid_set ? "prev BSSID\n" : "",
+ sdata->u.sta.authenticated ? "AUTH\n" : "",
+ sdata->u.sta.associated ? "ASSOC\n" : "",
+ sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "",
+ sdata->u.sta.use_protection ? "CTS prot\n" : "");
+}
+__IEEE80211_IF_FILE(flags);
+
+/* AP attributes */
+IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
+IEEE80211_IF_FILE(dtim_period, u.ap.dtim_period, DEC);
+IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
+IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
+IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC);
+IEEE80211_IF_FILE(max_ratectrl_rateidx, u.ap.max_ratectrl_rateidx, DEC);
+
+static ssize_t ieee80211_if_fmt_num_buffered_multicast(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ return scnprintf(buf, buflen, "%u\n",
+ skb_queue_len(&sdata->u.ap.ps_bc_buf));
+}
+__IEEE80211_IF_FILE(num_buffered_multicast);
+
+static ssize_t ieee80211_if_fmt_beacon_head_len(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ if (sdata->u.ap.beacon_head)
+ return scnprintf(buf, buflen, "%d\n",
+ sdata->u.ap.beacon_head_len);
+ return scnprintf(buf, buflen, "\n");
+}
+__IEEE80211_IF_FILE(beacon_head_len);
+
+static ssize_t ieee80211_if_fmt_beacon_tail_len(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ if (sdata->u.ap.beacon_tail)
+ return scnprintf(buf, buflen, "%d\n",
+ sdata->u.ap.beacon_tail_len);
+ return scnprintf(buf, buflen, "\n");
+}
+__IEEE80211_IF_FILE(beacon_tail_len);
+
+/* WDS attributes */
+IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
+
+/* VLAN attributes */
+IEEE80211_IF_FILE(vlan_id, u.vlan.id, DEC);
+
+/* MONITOR attributes */
+static ssize_t ieee80211_if_fmt_mode(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ return scnprintf(buf, buflen, "%s\n",
+ ((local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) ||
+ local->open_count == local->monitors) ?
+ "hard" : "soft");
+}
+__IEEE80211_IF_FILE(mode);
+
+
+#define DEBUGFS_ADD(name, type)\
+ sdata->debugfs.type.name = debugfs_create_file(#name, 0444,\
+ sdata->debugfsdir, sdata, &name##_ops);
+
+static void add_sta_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_ADD(channel_use, sta);
+ DEBUGFS_ADD(drop_unencrypted, sta);
+ DEBUGFS_ADD(eapol, sta);
+ DEBUGFS_ADD(ieee8021_x, sta);
+ DEBUGFS_ADD(state, sta);
+ DEBUGFS_ADD(bssid, sta);
+ DEBUGFS_ADD(prev_bssid, sta);
+ DEBUGFS_ADD(ssid_len, sta);
+ DEBUGFS_ADD(aid, sta);
+ DEBUGFS_ADD(ap_capab, sta);
+ DEBUGFS_ADD(capab, sta);
+ DEBUGFS_ADD(extra_ie_len, sta);
+ DEBUGFS_ADD(auth_tries, sta);
+ DEBUGFS_ADD(assoc_tries, sta);
+ DEBUGFS_ADD(auth_algs, sta);
+ DEBUGFS_ADD(auth_alg, sta);
+ DEBUGFS_ADD(auth_transaction, sta);
+ DEBUGFS_ADD(flags, sta);
+}
+
+static void add_ap_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_ADD(channel_use, ap);
+ DEBUGFS_ADD(drop_unencrypted, ap);
+ DEBUGFS_ADD(eapol, ap);
+ DEBUGFS_ADD(ieee8021_x, ap);
+ DEBUGFS_ADD(num_sta_ps, ap);
+ DEBUGFS_ADD(dtim_period, ap);
+ DEBUGFS_ADD(dtim_count, ap);
+ DEBUGFS_ADD(num_beacons, ap);
+ DEBUGFS_ADD(force_unicast_rateidx, ap);
+ DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+ DEBUGFS_ADD(num_buffered_multicast, ap);
+ DEBUGFS_ADD(beacon_head_len, ap);
+ DEBUGFS_ADD(beacon_tail_len, ap);
+}
+
+static void add_wds_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_ADD(channel_use, wds);
+ DEBUGFS_ADD(drop_unencrypted, wds);
+ DEBUGFS_ADD(eapol, wds);
+ DEBUGFS_ADD(ieee8021_x, wds);
+ DEBUGFS_ADD(peer, wds);
+}
+
+static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_ADD(channel_use, vlan);
+ DEBUGFS_ADD(drop_unencrypted, vlan);
+ DEBUGFS_ADD(eapol, vlan);
+ DEBUGFS_ADD(ieee8021_x, vlan);
+ DEBUGFS_ADD(vlan_id, vlan);
+}
+
+static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_ADD(mode, monitor);
+}
+
+static void add_files(struct ieee80211_sub_if_data *sdata)
+{
+ if (!sdata->debugfsdir)
+ return;
+
+ switch (sdata->type) {
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ add_sta_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_AP:
+ add_ap_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_WDS:
+ add_wds_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_MNTR:
+ add_monitor_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_VLAN:
+ add_vlan_files(sdata);
+ break;
+ default:
+ break;
+ }
+}
+
+#define DEBUGFS_DEL(name, type)\
+ debugfs_remove(sdata->debugfs.type.name);\
+ sdata->debugfs.type.name = NULL;
+
+static void del_sta_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_DEL(channel_use, sta);
+ DEBUGFS_DEL(drop_unencrypted, sta);
+ DEBUGFS_DEL(eapol, sta);
+ DEBUGFS_DEL(ieee8021_x, sta);
+ DEBUGFS_DEL(state, sta);
+ DEBUGFS_DEL(bssid, sta);
+ DEBUGFS_DEL(prev_bssid, sta);
+ DEBUGFS_DEL(ssid_len, sta);
+ DEBUGFS_DEL(aid, sta);
+ DEBUGFS_DEL(ap_capab, sta);
+ DEBUGFS_DEL(capab, sta);
+ DEBUGFS_DEL(extra_ie_len, sta);
+ DEBUGFS_DEL(auth_tries, sta);
+ DEBUGFS_DEL(assoc_tries, sta);
+ DEBUGFS_DEL(auth_algs, sta);
+ DEBUGFS_DEL(auth_alg, sta);
+ DEBUGFS_DEL(auth_transaction, sta);
+ DEBUGFS_DEL(flags, sta);
+}
+
+static void del_ap_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_DEL(channel_use, ap);
+ DEBUGFS_DEL(drop_unencrypted, ap);
+ DEBUGFS_DEL(eapol, ap);
+ DEBUGFS_DEL(ieee8021_x, ap);
+ DEBUGFS_DEL(num_sta_ps, ap);
+ DEBUGFS_DEL(dtim_period, ap);
+ DEBUGFS_DEL(dtim_count, ap);
+ DEBUGFS_DEL(num_beacons, ap);
+ DEBUGFS_DEL(force_unicast_rateidx, ap);
+ DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+ DEBUGFS_DEL(num_buffered_multicast, ap);
+ DEBUGFS_DEL(beacon_head_len, ap);
+ DEBUGFS_DEL(beacon_tail_len, ap);
+}
+
+static void del_wds_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_DEL(channel_use, wds);
+ DEBUGFS_DEL(drop_unencrypted, wds);
+ DEBUGFS_DEL(eapol, wds);
+ DEBUGFS_DEL(ieee8021_x, wds);
+ DEBUGFS_DEL(peer, wds);
+}
+
+static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_DEL(channel_use, vlan);
+ DEBUGFS_DEL(drop_unencrypted, vlan);
+ DEBUGFS_DEL(eapol, vlan);
+ DEBUGFS_DEL(ieee8021_x, vlan);
+ DEBUGFS_DEL(vlan_id, vlan);
+}
+
+static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
+{
+ DEBUGFS_DEL(mode, monitor);
+}
+
+static void del_files(struct ieee80211_sub_if_data *sdata, int type)
+{
+ if (!sdata->debugfsdir)
+ return;
+
+ switch (type) {
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ del_sta_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_AP:
+ del_ap_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_WDS:
+ del_wds_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_MNTR:
+ del_monitor_files(sdata);
+ break;
+ case IEEE80211_IF_TYPE_VLAN:
+ del_vlan_files(sdata);
+ break;
+ default:
+ break;
+ }
+}
+
+static int notif_registered;
+
+void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
+{
+ char buf[10+IFNAMSIZ];
+
+ if (!notif_registered)
+ return;
+
+ sprintf(buf, "netdev:%s", sdata->dev->name);
+ sdata->debugfsdir = debugfs_create_dir(buf,
+ sdata->local->hw.wiphy->debugfsdir);
+}
+
+void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
+{
+ del_files(sdata, sdata->type);
+ debugfs_remove(sdata->debugfsdir);
+ sdata->debugfsdir = NULL;
+}
+
+void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata,
+ int oldtype)
+{
+ del_files(sdata, oldtype);
+ add_files(sdata);
+}
+
+static int netdev_notify(struct notifier_block * nb,
+ unsigned long state,
+ void *ndev)
+{
+ struct net_device *dev = ndev;
+ char buf[10+IFNAMSIZ];
+
+ if (state != NETDEV_CHANGENAME)
+ return 0;
+
+ if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
+ return 0;
+
+ if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
+ return 0;
+
+ /* TODO
+ sprintf(buf, "netdev:%s", dev->name);
+ debugfs_rename(IEEE80211_DEV_TO_SUB_IF(dev)->debugfsdir, buf);
+ */
+
+ return 0;
+}
+
+static struct notifier_block mac80211_debugfs_netdev_notifier = {
+ .notifier_call = netdev_notify,
+};
+
+void ieee80211_debugfs_netdev_init(void)
+{
+ int err;
+
+ err = register_netdevice_notifier(&mac80211_debugfs_netdev_notifier);
+ if (err) {
+ printk(KERN_ERR
+ "mac80211: failed to install netdev notifier,"
+ " disabling per-netdev debugfs!\n");
+ } else
+ notif_registered = 1;
+}
+
+void ieee80211_debugfs_netdev_exit(void)
+{
+ unregister_netdevice_notifier(&mac80211_debugfs_netdev_notifier);
+ notif_registered = 0;
+}
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
new file mode 100644
index 000000000000..a690071fde8a
--- /dev/null
+++ b/net/mac80211/debugfs_netdev.h
@@ -0,0 +1,30 @@
+/* routines exported for debugfs handling */
+
+#ifndef __IEEE80211_DEBUGFS_NETDEV_H
+#define __IEEE80211_DEBUGFS_NETDEV_H
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata,
+ int oldtype);
+void ieee80211_debugfs_netdev_init(void);
+void ieee80211_debugfs_netdev_exit(void);
+#else
+static inline void ieee80211_debugfs_add_netdev(
+ struct ieee80211_sub_if_data *sdata)
+{}
+static inline void ieee80211_debugfs_remove_netdev(
+ struct ieee80211_sub_if_data *sdata)
+{}
+static inline void ieee80211_debugfs_change_if_type(
+ struct ieee80211_sub_if_data *sdata, int oldtype)
+{}
+static inline void ieee80211_debugfs_netdev_init(void)
+{}
+
+static inline void ieee80211_debugfs_netdev_exit(void)
+{}
+#endif
+
+#endif /* __IEEE80211_DEBUGFS_NETDEV_H */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
new file mode 100644
index 000000000000..d41e696f3980
--- /dev/null
+++ b/net/mac80211/debugfs_sta.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2003-2005 Devicescape Software, Inc.
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/ieee80211.h>
+#include "ieee80211_i.h"
+#include "debugfs.h"
+#include "debugfs_sta.h"
+#include "sta_info.h"
+
+/* sta attributtes */
+
+#define STA_READ(name, buflen, field, format_string) \
+static ssize_t sta_ ##name## _read(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ int res; \
+ struct sta_info *sta = file->private_data; \
+ char buf[buflen]; \
+ res = scnprintf(buf, buflen, format_string, sta->field); \
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \
+}
+#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n")
+#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n")
+#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n")
+#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n")
+
+#define STA_READ_RATE(name, field) \
+static ssize_t sta_##name##_read(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct sta_info *sta = file->private_data; \
+ struct ieee80211_local *local = wdev_priv(sta->dev->ieee80211_ptr);\
+ struct ieee80211_hw_mode *mode = local->oper_hw_mode; \
+ char buf[20]; \
+ int res = scnprintf(buf, sizeof(buf), "%d\n", \
+ (sta->field >= 0 && \
+ sta->field < mode->num_rates) ? \
+ mode->rates[sta->field].rate : -1); \
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \
+}
+
+#define STA_OPS(name) \
+static const struct file_operations sta_ ##name## _ops = { \
+ .read = sta_##name##_read, \
+ .open = mac80211_open_file_generic, \
+}
+
+#define STA_FILE(name, field, format) \
+ STA_READ_##format(name, field) \
+ STA_OPS(name)
+
+STA_FILE(aid, aid, D);
+STA_FILE(key_idx_compression, key_idx_compression, D);
+STA_FILE(dev, dev->name, S);
+STA_FILE(vlan_id, vlan_id, D);
+STA_FILE(rx_packets, rx_packets, LU);
+STA_FILE(tx_packets, tx_packets, LU);
+STA_FILE(rx_bytes, rx_bytes, LU);
+STA_FILE(tx_bytes, tx_bytes, LU);
+STA_FILE(rx_duplicates, num_duplicates, LU);
+STA_FILE(rx_fragments, rx_fragments, LU);
+STA_FILE(rx_dropped, rx_dropped, LU);
+STA_FILE(tx_fragments, tx_fragments, LU);
+STA_FILE(tx_filtered, tx_filtered_count, LU);
+STA_FILE(txrate, txrate, RATE);
+STA_FILE(last_txrate, last_txrate, RATE);
+STA_FILE(tx_retry_failed, tx_retry_failed, LU);
+STA_FILE(tx_retry_count, tx_retry_count, LU);
+STA_FILE(last_rssi, last_rssi, D);
+STA_FILE(last_signal, last_signal, D);
+STA_FILE(last_noise, last_noise, D);
+STA_FILE(channel_use, channel_use, D);
+STA_FILE(wep_weak_iv_count, wep_weak_iv_count, D);
+
+static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[100];
+ struct sta_info *sta = file->private_data;
+ int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s",
+ sta->flags & WLAN_STA_AUTH ? "AUTH\n" : "",
+ sta->flags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
+ sta->flags & WLAN_STA_PS ? "PS\n" : "",
+ sta->flags & WLAN_STA_TIM ? "TIM\n" : "",
+ sta->flags & WLAN_STA_PERM ? "PERM\n" : "",
+ sta->flags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
+ sta->flags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
+ sta->flags & WLAN_STA_WME ? "WME\n" : "",
+ sta->flags & WLAN_STA_WDS ? "WDS\n" : "");
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(flags);
+
+static ssize_t sta_num_ps_buf_frames_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[20];
+ struct sta_info *sta = file->private_data;
+ int res = scnprintf(buf, sizeof(buf), "%u\n",
+ skb_queue_len(&sta->ps_tx_buf));
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(num_ps_buf_frames);
+
+static ssize_t sta_last_ack_rssi_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[100];
+ struct sta_info *sta = file->private_data;
+ int res = scnprintf(buf, sizeof(buf), "%d %d %d\n",
+ sta->last_ack_rssi[0],
+ sta->last_ack_rssi[1],
+ sta->last_ack_rssi[2]);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(last_ack_rssi);
+
+static ssize_t sta_last_ack_ms_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[20];
+ struct sta_info *sta = file->private_data;
+ int res = scnprintf(buf, sizeof(buf), "%d\n",
+ sta->last_ack ?
+ jiffies_to_msecs(jiffies - sta->last_ack) : -1);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(last_ack_ms);
+
+static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[20];
+ struct sta_info *sta = file->private_data;
+ int res = scnprintf(buf, sizeof(buf), "%d\n",
+ jiffies_to_msecs(jiffies - sta->last_rx));
+ return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(inactive_ms);
+
+static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[15*NUM_RX_DATA_QUEUES], *p = buf;
+ int i;
+ struct sta_info *sta = file->private_data;
+ for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p, "%x ",
+ sta->last_seq_ctrl[i]);
+ p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+STA_OPS(last_seq_ctrl);
+
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+static ssize_t sta_wme_rx_queue_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[15*NUM_RX_DATA_QUEUES], *p = buf;
+ int i;
+ struct sta_info *sta = file->private_data;
+ for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p, "%u ",
+ sta->wme_rx_queue[i]);
+ p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+STA_OPS(wme_rx_queue);
+
+static ssize_t sta_wme_tx_queue_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[15*NUM_TX_DATA_QUEUES], *p = buf;
+ int i;
+ struct sta_info *sta = file->private_data;
+ for (i = 0; i < NUM_TX_DATA_QUEUES; i++)
+ p += scnprintf(p, sizeof(buf)+buf-p, "%u ",
+ sta->wme_tx_queue[i]);
+ p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+STA_OPS(wme_tx_queue);
+#endif
+
+#define DEBUGFS_ADD(name) \
+ sta->debugfs.name = debugfs_create_file(#name, 0444, \
+ sta->debugfs.dir, sta, &sta_ ##name## _ops);
+
+#define DEBUGFS_DEL(name) \
+ debugfs_remove(sta->debugfs.name);\
+ sta->debugfs.name = NULL;
+
+
+void ieee80211_sta_debugfs_add(struct sta_info *sta)
+{
+ char buf[3*6];
+ struct dentry *stations_dir = sta->local->debugfs.stations;
+
+ if (!stations_dir)
+ return;
+
+ sprintf(buf, MAC_FMT, MAC_ARG(sta->addr));
+
+ sta->debugfs.dir = debugfs_create_dir(buf, stations_dir);
+ if (!sta->debugfs.dir)
+ return;
+
+ DEBUGFS_ADD(flags);
+ DEBUGFS_ADD(num_ps_buf_frames);
+ DEBUGFS_ADD(last_ack_rssi);
+ DEBUGFS_ADD(last_ack_ms);
+ DEBUGFS_ADD(inactive_ms);
+ DEBUGFS_ADD(last_seq_ctrl);
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ DEBUGFS_ADD(wme_rx_queue);
+ DEBUGFS_ADD(wme_tx_queue);
+#endif
+}
+
+void ieee80211_sta_debugfs_remove(struct sta_info *sta)
+{
+ DEBUGFS_DEL(flags);
+ DEBUGFS_DEL(num_ps_buf_frames);
+ DEBUGFS_DEL(last_ack_rssi);
+ DEBUGFS_DEL(last_ack_ms);
+ DEBUGFS_DEL(inactive_ms);
+ DEBUGFS_DEL(last_seq_ctrl);
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ DEBUGFS_DEL(wme_rx_queue);
+ DEBUGFS_DEL(wme_tx_queue);
+#endif
+
+ debugfs_remove(sta->debugfs.dir);
+ sta->debugfs.dir = NULL;
+}
diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h
new file mode 100644
index 000000000000..574a1cd54b96
--- /dev/null
+++ b/net/mac80211/debugfs_sta.h
@@ -0,0 +1,12 @@
+#ifndef __MAC80211_DEBUGFS_STA_H
+#define __MAC80211_DEBUGFS_STA_H
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+void ieee80211_sta_debugfs_add(struct sta_info *sta);
+void ieee80211_sta_debugfs_remove(struct sta_info *sta);
+#else
+static inline void ieee80211_sta_debugfs_add(struct sta_info *sta) {}
+static inline void ieee80211_sta_debugfs_remove(struct sta_info *sta) {}
+#endif
+
+#endif /* __MAC80211_DEBUGFS_STA_H */
diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h
new file mode 100644
index 000000000000..34fa128e9872
--- /dev/null
+++ b/net/mac80211/hostapd_ioctl.h
@@ -0,0 +1,108 @@
+/*
+ * Host AP (software wireless LAN access point) user space daemon for
+ * Host AP kernel driver
+ * Copyright 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef HOSTAPD_IOCTL_H
+#define HOSTAPD_IOCTL_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#endif /* __KERNEL__ */
+
+#define PRISM2_IOCTL_PRISM2_PARAM (SIOCIWFIRSTPRIV + 0)
+#define PRISM2_IOCTL_GET_PRISM2_PARAM (SIOCIWFIRSTPRIV + 1)
+#define PRISM2_IOCTL_HOSTAPD (SIOCIWFIRSTPRIV + 3)
+
+/* PRISM2_IOCTL_PRISM2_PARAM ioctl() subtypes:
+ * This table is no longer added to, the whole sub-ioctl
+ * mess shall be deleted completely. */
+enum {
+ PRISM2_PARAM_IEEE_802_1X = 23,
+ PRISM2_PARAM_ANTSEL_TX = 24,
+ PRISM2_PARAM_ANTSEL_RX = 25,
+
+ /* Instant802 additions */
+ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001,
+ PRISM2_PARAM_DROP_UNENCRYPTED = 1002,
+ PRISM2_PARAM_PREAMBLE = 1003,
+ PRISM2_PARAM_SHORT_SLOT_TIME = 1006,
+ PRISM2_PARAM_NEXT_MODE = 1008,
+ PRISM2_PARAM_CLEAR_KEYS = 1009,
+ PRISM2_PARAM_RADIO_ENABLED = 1010,
+ PRISM2_PARAM_ANTENNA_MODE = 1013,
+ PRISM2_PARAM_STAT_TIME = 1016,
+ PRISM2_PARAM_STA_ANTENNA_SEL = 1017,
+ PRISM2_PARAM_FORCE_UNICAST_RATE = 1018,
+ PRISM2_PARAM_RATE_CTRL_NUM_UP = 1019,
+ PRISM2_PARAM_RATE_CTRL_NUM_DOWN = 1020,
+ PRISM2_PARAM_MAX_RATECTRL_RATE = 1021,
+ PRISM2_PARAM_TX_POWER_REDUCTION = 1022,
+ PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024,
+ PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026,
+ PRISM2_PARAM_WIFI_WME_NOACK_TEST = 1033,
+ PRISM2_PARAM_SCAN_FLAGS = 1035,
+ PRISM2_PARAM_HW_MODES = 1036,
+ PRISM2_PARAM_CREATE_IBSS = 1037,
+ PRISM2_PARAM_WMM_ENABLED = 1038,
+ PRISM2_PARAM_MIXED_CELL = 1039,
+ PRISM2_PARAM_RADAR_DETECT = 1043,
+ PRISM2_PARAM_SPECTRUM_MGMT = 1044,
+};
+
+enum {
+ IEEE80211_KEY_MGMT_NONE = 0,
+ IEEE80211_KEY_MGMT_IEEE8021X = 1,
+ IEEE80211_KEY_MGMT_WPA_PSK = 2,
+ IEEE80211_KEY_MGMT_WPA_EAP = 3,
+};
+
+
+/* Data structures used for get_hw_features ioctl */
+struct hostapd_ioctl_hw_modes_hdr {
+ int mode;
+ int num_channels;
+ int num_rates;
+};
+
+struct ieee80211_channel_data {
+ short chan; /* channel number (IEEE 802.11) */
+ short freq; /* frequency in MHz */
+ int flag; /* flag for hostapd use (IEEE80211_CHAN_*) */
+};
+
+struct ieee80211_rate_data {
+ int rate; /* rate in 100 kbps */
+ int flags; /* IEEE80211_RATE_ flags */
+};
+
+
+/* ADD_IF, REMOVE_IF, and UPDATE_IF 'type' argument */
+enum {
+ HOSTAP_IF_WDS = 1, HOSTAP_IF_VLAN = 2, HOSTAP_IF_BSS = 3,
+ HOSTAP_IF_STA = 4
+};
+
+struct hostapd_if_wds {
+ u8 remote_addr[ETH_ALEN];
+};
+
+struct hostapd_if_vlan {
+ u8 id;
+};
+
+struct hostapd_if_bss {
+ u8 bssid[ETH_ALEN];
+};
+
+struct hostapd_if_sta {
+};
+
+#endif /* HOSTAPD_IOCTL_H */
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
new file mode 100644
index 000000000000..6e36df67f8d5
--- /dev/null
+++ b/net/mac80211/ieee80211.c
@@ -0,0 +1,4984 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/mac80211.h>
+#include <net/ieee80211_radiotap.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/wireless.h>
+#include <linux/rtnetlink.h>
+#include <net/iw_handler.h>
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include <net/cfg80211.h>
+
+#include "ieee80211_common.h"
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "wep.h"
+#include "wpa.h"
+#include "tkip.h"
+#include "wme.h"
+#include "aes_ccm.h"
+#include "ieee80211_led.h"
+#include "ieee80211_cfg.h"
+#include "debugfs.h"
+#include "debugfs_netdev.h"
+#include "debugfs_key.h"
+
+/* privid for wiphys to determine whether they belong to us or not */
+void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
+
+/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
+/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
+static const unsigned char rfc1042_header[] =
+ { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
+
+/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
+static const unsigned char bridge_tunnel_header[] =
+ { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
+
+/* No encapsulation header if EtherType < 0x600 (=length) */
+static const unsigned char eapol_header[] =
+ { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e };
+
+
+static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_hdr *hdr)
+{
+ /* Set the sequence number for this frame. */
+ hdr->seq_ctrl = cpu_to_le16(sdata->sequence);
+
+ /* Increase the sequence number. */
+ sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ;
+}
+
+struct ieee80211_key_conf *
+ieee80211_key_data2conf(struct ieee80211_local *local,
+ const struct ieee80211_key *data)
+{
+ struct ieee80211_key_conf *conf;
+
+ conf = kmalloc(sizeof(*conf) + data->keylen, GFP_ATOMIC);
+ if (!conf)
+ return NULL;
+
+ conf->hw_key_idx = data->hw_key_idx;
+ conf->alg = data->alg;
+ conf->keylen = data->keylen;
+ conf->flags = 0;
+ if (data->force_sw_encrypt)
+ conf->flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT;
+ conf->keyidx = data->keyidx;
+ if (data->default_tx_key)
+ conf->flags |= IEEE80211_KEY_DEFAULT_TX_KEY;
+ if (local->default_wep_only)
+ conf->flags |= IEEE80211_KEY_DEFAULT_WEP_ONLY;
+ memcpy(conf->key, data->key, data->keylen);
+
+ return conf;
+}
+
+struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata,
+ int idx, size_t key_len, gfp_t flags)
+{
+ struct ieee80211_key *key;
+
+ key = kzalloc(sizeof(struct ieee80211_key) + key_len, flags);
+ if (!key)
+ return NULL;
+ kref_init(&key->kref);
+ return key;
+}
+
+static void ieee80211_key_release(struct kref *kref)
+{
+ struct ieee80211_key *key;
+
+ key = container_of(kref, struct ieee80211_key, kref);
+ if (key->alg == ALG_CCMP)
+ ieee80211_aes_key_free(key->u.ccmp.tfm);
+ ieee80211_debugfs_key_remove(key);
+ kfree(key);
+}
+
+void ieee80211_key_free(struct ieee80211_key *key)
+{
+ if (key)
+ kref_put(&key->kref, ieee80211_key_release);
+}
+
+static int rate_list_match(const int *rate_list, int rate)
+{
+ int i;
+
+ if (!rate_list)
+ return 0;
+
+ for (i = 0; rate_list[i] >= 0; i++)
+ if (rate_list[i] == rate)
+ return 1;
+
+ return 0;
+}
+
+
+void ieee80211_prepare_rates(struct ieee80211_local *local,
+ struct ieee80211_hw_mode *mode)
+{
+ int i;
+
+ for (i = 0; i < mode->num_rates; i++) {
+ struct ieee80211_rate *rate = &mode->rates[i];
+
+ rate->flags &= ~(IEEE80211_RATE_SUPPORTED |
+ IEEE80211_RATE_BASIC);
+
+ if (local->supp_rates[mode->mode]) {
+ if (!rate_list_match(local->supp_rates[mode->mode],
+ rate->rate))
+ continue;
+ }
+
+ rate->flags |= IEEE80211_RATE_SUPPORTED;
+
+ /* Use configured basic rate set if it is available. If not,
+ * use defaults that are sane for most cases. */
+ if (local->basic_rates[mode->mode]) {
+ if (rate_list_match(local->basic_rates[mode->mode],
+ rate->rate))
+ rate->flags |= IEEE80211_RATE_BASIC;
+ } else switch (mode->mode) {
+ case MODE_IEEE80211A:
+ if (rate->rate == 60 || rate->rate == 120 ||
+ rate->rate == 240)
+ rate->flags |= IEEE80211_RATE_BASIC;
+ break;
+ case MODE_IEEE80211B:
+ if (rate->rate == 10 || rate->rate == 20)
+ rate->flags |= IEEE80211_RATE_BASIC;
+ break;
+ case MODE_ATHEROS_TURBO:
+ if (rate->rate == 120 || rate->rate == 240 ||
+ rate->rate == 480)
+ rate->flags |= IEEE80211_RATE_BASIC;
+ break;
+ case MODE_IEEE80211G:
+ if (rate->rate == 10 || rate->rate == 20 ||
+ rate->rate == 55 || rate->rate == 110)
+ rate->flags |= IEEE80211_RATE_BASIC;
+ break;
+ }
+
+ /* Set ERP and MANDATORY flags based on phymode */
+ switch (mode->mode) {
+ case MODE_IEEE80211A:
+ if (rate->rate == 60 || rate->rate == 120 ||
+ rate->rate == 240)
+ rate->flags |= IEEE80211_RATE_MANDATORY;
+ break;
+ case MODE_IEEE80211B:
+ if (rate->rate == 10)
+ rate->flags |= IEEE80211_RATE_MANDATORY;
+ break;
+ case MODE_ATHEROS_TURBO:
+ break;
+ case MODE_IEEE80211G:
+ if (rate->rate == 10 || rate->rate == 20 ||
+ rate->rate == 55 || rate->rate == 110 ||
+ rate->rate == 60 || rate->rate == 120 ||
+ rate->rate == 240)
+ rate->flags |= IEEE80211_RATE_MANDATORY;
+ break;
+ }
+ if (ieee80211_is_erp_rate(mode->mode, rate->rate))
+ rate->flags |= IEEE80211_RATE_ERP;
+ }
+}
+
+
+static void ieee80211_key_threshold_notify(struct net_device *dev,
+ struct ieee80211_key *key,
+ struct sta_info *sta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sk_buff *skb;
+ struct ieee80211_msg_key_notification *msg;
+
+ /* if no one will get it anyway, don't even allocate it.
+ * unlikely because this is only relevant for APs
+ * where the device must be open... */
+ if (unlikely(!local->apdev))
+ return;
+
+ skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) +
+ sizeof(struct ieee80211_msg_key_notification));
+ if (!skb)
+ return;
+
+ skb_reserve(skb, sizeof(struct ieee80211_frame_info));
+ msg = (struct ieee80211_msg_key_notification *)
+ skb_put(skb, sizeof(struct ieee80211_msg_key_notification));
+ msg->tx_rx_count = key->tx_rx_count;
+ memcpy(msg->ifname, dev->name, IFNAMSIZ);
+ if (sta)
+ memcpy(msg->addr, sta->addr, ETH_ALEN);
+ else
+ memset(msg->addr, 0xff, ETH_ALEN);
+
+ key->tx_rx_count = 0;
+
+ ieee80211_rx_mgmt(local, skb, NULL,
+ ieee80211_msg_key_threshold_notification);
+}
+
+
+static u8 * ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
+{
+ u16 fc;
+
+ if (len < 24)
+ return NULL;
+
+ fc = le16_to_cpu(hdr->frame_control);
+
+ switch (fc & IEEE80211_FCTL_FTYPE) {
+ case IEEE80211_FTYPE_DATA:
+ switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
+ case IEEE80211_FCTL_TODS:
+ return hdr->addr1;
+ case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
+ return NULL;
+ case IEEE80211_FCTL_FROMDS:
+ return hdr->addr2;
+ case 0:
+ return hdr->addr3;
+ }
+ break;
+ case IEEE80211_FTYPE_MGMT:
+ return hdr->addr3;
+ case IEEE80211_FTYPE_CTL:
+ if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
+ return hdr->addr1;
+ else
+ return NULL;
+ }
+
+ return NULL;
+}
+
+int ieee80211_get_hdrlen(u16 fc)
+{
+ int hdrlen = 24;
+
+ switch (fc & IEEE80211_FCTL_FTYPE) {
+ case IEEE80211_FTYPE_DATA:
+ if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
+ hdrlen = 30; /* Addr4 */
+ /*
+ * The QoS Control field is two bytes and its presence is
+ * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to
+ * hdrlen if that bit is set.
+ * This works by masking out the bit and shifting it to
+ * bit position 1 so the result has the value 0 or 2.
+ */
+ hdrlen += (fc & IEEE80211_STYPE_QOS_DATA)
+ >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1);
+ break;
+ case IEEE80211_FTYPE_CTL:
+ /*
+ * ACK and CTS are 10 bytes, all others 16. To see how
+ * to get this condition consider
+ * subtype mask: 0b0000000011110000 (0x00F0)
+ * ACK subtype: 0b0000000011010000 (0x00D0)
+ * CTS subtype: 0b0000000011000000 (0x00C0)
+ * bits that matter: ^^^ (0x00E0)
+ * value of those: 0b0000000011000000 (0x00C0)
+ */
+ if ((fc & 0xE0) == 0xC0)
+ hdrlen = 10;
+ else
+ hdrlen = 16;
+ break;
+ }
+
+ return hdrlen;
+}
+EXPORT_SYMBOL(ieee80211_get_hdrlen);
+
+int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
+{
+ const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data;
+ int hdrlen;
+
+ if (unlikely(skb->len < 10))
+ return 0;
+ hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
+ if (unlikely(hdrlen > skb->len))
+ return 0;
+ return hdrlen;
+}
+EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
+
+static int ieee80211_get_radiotap_len(struct sk_buff *skb)
+{
+ struct ieee80211_radiotap_header *hdr =
+ (struct ieee80211_radiotap_header *) skb->data;
+
+ return le16_to_cpu(hdr->it_len);
+}
+
+#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP
+static void ieee80211_dump_frame(const char *ifname, const char *title,
+ const struct sk_buff *skb)
+{
+ const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+
+ printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len);
+ if (skb->len < 4) {
+ printk("\n");
+ return;
+ }
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+ if (hdrlen > skb->len)
+ hdrlen = skb->len;
+ if (hdrlen >= 4)
+ printk(" FC=0x%04x DUR=0x%04x",
+ fc, le16_to_cpu(hdr->duration_id));
+ if (hdrlen >= 10)
+ printk(" A1=" MAC_FMT, MAC_ARG(hdr->addr1));
+ if (hdrlen >= 16)
+ printk(" A2=" MAC_FMT, MAC_ARG(hdr->addr2));
+ if (hdrlen >= 24)
+ printk(" A3=" MAC_FMT, MAC_ARG(hdr->addr3));
+ if (hdrlen >= 30)
+ printk(" A4=" MAC_FMT, MAC_ARG(hdr->addr4));
+ printk("\n");
+}
+#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */
+static inline void ieee80211_dump_frame(const char *ifname, const char *title,
+ struct sk_buff *skb)
+{
+}
+#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */
+
+
+static int ieee80211_is_eapol(const struct sk_buff *skb)
+{
+ const struct ieee80211_hdr *hdr;
+ u16 fc;
+ int hdrlen;
+
+ if (unlikely(skb->len < 10))
+ return 0;
+
+ hdr = (const struct ieee80211_hdr *) skb->data;
+ fc = le16_to_cpu(hdr->frame_control);
+
+ if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+ return 0;
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) &&
+ memcmp(skb->data + hdrlen, eapol_header,
+ sizeof(eapol_header)) == 0))
+ return 1;
+
+ return 0;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
+{
+ struct rate_control_extra extra;
+
+ memset(&extra, 0, sizeof(extra));
+ extra.mode = tx->u.tx.mode;
+ extra.mgmt_data = tx->sdata &&
+ tx->sdata->type == IEEE80211_IF_TYPE_MGMT;
+ extra.ethertype = tx->ethertype;
+
+ tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, tx->skb,
+ &extra);
+ if (unlikely(extra.probe != NULL)) {
+ tx->u.tx.control->flags |= IEEE80211_TXCTL_RATE_CTRL_PROBE;
+ tx->u.tx.probe_last_frag = 1;
+ tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val;
+ tx->u.tx.rate = extra.probe;
+ } else {
+ tx->u.tx.control->alt_retry_rate = -1;
+ }
+ if (!tx->u.tx.rate)
+ return TXRX_DROP;
+ if (tx->u.tx.mode->mode == MODE_IEEE80211G &&
+ tx->local->cts_protect_erp_frames && tx->fragmented &&
+ extra.nonerp) {
+ tx->u.tx.last_frag_rate = tx->u.tx.rate;
+ tx->u.tx.probe_last_frag = extra.probe ? 1 : 0;
+
+ tx->u.tx.rate = extra.nonerp;
+ tx->u.tx.control->rate = extra.nonerp;
+ tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
+ } else {
+ tx->u.tx.last_frag_rate = tx->u.tx.rate;
+ tx->u.tx.control->rate = tx->u.tx.rate;
+ }
+ tx->u.tx.control->tx_rate = tx->u.tx.rate->val;
+ if ((tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) &&
+ tx->local->short_preamble &&
+ (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) {
+ tx->u.tx.short_preamble = 1;
+ tx->u.tx.control->tx_rate = tx->u.tx.rate->val2;
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
+{
+ if (tx->sta)
+ tx->u.tx.control->key_idx = tx->sta->key_idx_compression;
+ else
+ tx->u.tx.control->key_idx = HW_KEY_IDX_INVALID;
+
+ if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
+ tx->key = NULL;
+ else if (tx->sta && tx->sta->key)
+ tx->key = tx->sta->key;
+ else if (tx->sdata->default_key)
+ tx->key = tx->sdata->default_key;
+ else if (tx->sdata->drop_unencrypted &&
+ !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) {
+ I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
+ return TXRX_DROP;
+ } else
+ tx->key = NULL;
+
+ if (tx->key) {
+ tx->key->tx_rx_count++;
+ if (unlikely(tx->local->key_tx_rx_threshold &&
+ tx->key->tx_rx_count >
+ tx->local->key_tx_rx_threshold)) {
+ ieee80211_key_threshold_notify(tx->dev, tx->key,
+ tx->sta);
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+ size_t hdrlen, per_fragm, num_fragm, payload_len, left;
+ struct sk_buff **frags, *first, *frag;
+ int i;
+ u16 seq;
+ u8 *pos;
+ int frag_threshold = tx->local->fragmentation_threshold;
+
+ if (!tx->fragmented)
+ return TXRX_CONTINUE;
+
+ first = tx->skb;
+
+ hdrlen = ieee80211_get_hdrlen(tx->fc);
+ payload_len = first->len - hdrlen;
+ per_fragm = frag_threshold - hdrlen - FCS_LEN;
+ num_fragm = (payload_len + per_fragm - 1) / per_fragm;
+
+ frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC);
+ if (!frags)
+ goto fail;
+
+ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
+ seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ;
+ pos = first->data + hdrlen + per_fragm;
+ left = payload_len - per_fragm;
+ for (i = 0; i < num_fragm - 1; i++) {
+ struct ieee80211_hdr *fhdr;
+ size_t copylen;
+
+ if (left <= 0)
+ goto fail;
+
+ /* reserve enough extra head and tail room for possible
+ * encryption */
+ frag = frags[i] =
+ dev_alloc_skb(tx->local->hw.extra_tx_headroom +
+ frag_threshold +
+ IEEE80211_ENCRYPT_HEADROOM +
+ IEEE80211_ENCRYPT_TAILROOM);
+ if (!frag)
+ goto fail;
+ /* Make sure that all fragments use the same priority so
+ * that they end up using the same TX queue */
+ frag->priority = first->priority;
+ skb_reserve(frag, tx->local->hw.extra_tx_headroom +
+ IEEE80211_ENCRYPT_HEADROOM);
+ fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen);
+ memcpy(fhdr, first->data, hdrlen);
+ if (i == num_fragm - 2)
+ fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS);
+ fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
+ copylen = left > per_fragm ? per_fragm : left;
+ memcpy(skb_put(frag, copylen), pos, copylen);
+
+ pos += copylen;
+ left -= copylen;
+ }
+ skb_trim(first, hdrlen + per_fragm);
+
+ tx->u.tx.num_extra_frag = num_fragm - 1;
+ tx->u.tx.extra_frag = frags;
+
+ return TXRX_CONTINUE;
+
+ fail:
+ printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name);
+ if (frags) {
+ for (i = 0; i < num_fragm - 1; i++)
+ if (frags[i])
+ dev_kfree_skb(frags[i]);
+ kfree(frags);
+ }
+ I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment);
+ return TXRX_DROP;
+}
+
+
+static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb)
+{
+ if (tx->key->force_sw_encrypt) {
+ if (ieee80211_wep_encrypt(tx->local, skb, tx->key))
+ return -1;
+ } else {
+ tx->u.tx.control->key_idx = tx->key->hw_key_idx;
+ if (tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) {
+ if (ieee80211_wep_add_iv(tx->local, skb, tx->key) ==
+ NULL)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+
+ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ if (tx->u.tx.extra_frag) {
+ struct ieee80211_hdr *fhdr;
+ int i;
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ fhdr = (struct ieee80211_hdr *)
+ tx->u.tx.extra_frag[i]->data;
+ fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ }
+ }
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_wep_encrypt(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+ u16 fc;
+
+ fc = le16_to_cpu(hdr->frame_control);
+
+ if (!tx->key || tx->key->alg != ALG_WEP ||
+ ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
+ ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
+ (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)))
+ return TXRX_CONTINUE;
+
+ tx->u.tx.control->iv_len = WEP_IV_LEN;
+ tx->u.tx.control->icv_len = WEP_ICV_LEN;
+ ieee80211_tx_set_iswep(tx);
+
+ if (wep_encrypt_skb(tx, tx->skb) < 0) {
+ I802_DEBUG_INC(tx->local->tx_handlers_drop_wep);
+ return TXRX_DROP;
+ }
+
+ if (tx->u.tx.extra_frag) {
+ int i;
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) {
+ I802_DEBUG_INC(tx->local->
+ tx_handlers_drop_wep);
+ return TXRX_DROP;
+ }
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
+ int rate, int erp, int short_preamble)
+{
+ int dur;
+
+ /* calculate duration (in microseconds, rounded up to next higher
+ * integer if it includes a fractional microsecond) to send frame of
+ * len bytes (does not include FCS) at the given rate. Duration will
+ * also include SIFS.
+ *
+ * rate is in 100 kbps, so divident is multiplied by 10 in the
+ * DIV_ROUND_UP() operations.
+ */
+
+ if (local->hw.conf.phymode == MODE_IEEE80211A || erp ||
+ local->hw.conf.phymode == MODE_ATHEROS_TURBO) {
+ /*
+ * OFDM:
+ *
+ * N_DBPS = DATARATE x 4
+ * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS)
+ * (16 = SIGNAL time, 6 = tail bits)
+ * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext
+ *
+ * T_SYM = 4 usec
+ * 802.11a - 17.5.2: aSIFSTime = 16 usec
+ * 802.11g - 19.8.4: aSIFSTime = 10 usec +
+ * signal ext = 6 usec
+ */
+ /* FIX: Atheros Turbo may have different (shorter) duration? */
+ dur = 16; /* SIFS + signal ext */
+ dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */
+ dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */
+ dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10,
+ 4 * rate); /* T_SYM x N_SYM */
+ } else {
+ /*
+ * 802.11b or 802.11g with 802.11b compatibility:
+ * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime +
+ * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0.
+ *
+ * 802.11 (DS): 15.3.3, 802.11b: 18.3.4
+ * aSIFSTime = 10 usec
+ * aPreambleLength = 144 usec or 72 usec with short preamble
+ * aPLCPHeaderLength = 48 usec or 24 usec with short preamble
+ */
+ dur = 10; /* aSIFSTime = 10 usec */
+ dur += short_preamble ? (72 + 24) : (144 + 48);
+
+ dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate);
+ }
+
+ return dur;
+}
+
+
+/* Exported duration function for driver use */
+__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
+ size_t frame_len, int rate)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ u16 dur;
+ int erp;
+
+ erp = ieee80211_is_erp_rate(hw->conf.phymode, rate);
+ dur = ieee80211_frame_duration(local, frame_len, rate,
+ erp, local->short_preamble);
+
+ return cpu_to_le16(dur);
+}
+EXPORT_SYMBOL(ieee80211_generic_frame_duration);
+
+
+static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
+ int next_frag_len)
+{
+ int rate, mrate, erp, dur, i;
+ struct ieee80211_rate *txrate = tx->u.tx.rate;
+ struct ieee80211_local *local = tx->local;
+ struct ieee80211_hw_mode *mode = tx->u.tx.mode;
+
+ erp = txrate->flags & IEEE80211_RATE_ERP;
+
+ /*
+ * data and mgmt (except PS Poll):
+ * - during CFP: 32768
+ * - during contention period:
+ * if addr1 is group address: 0
+ * if more fragments = 0 and addr1 is individual address: time to
+ * transmit one ACK plus SIFS
+ * if more fragments = 1 and addr1 is individual address: time to
+ * transmit next fragment plus 2 x ACK plus 3 x SIFS
+ *
+ * IEEE 802.11, 9.6:
+ * - control response frame (CTS or ACK) shall be transmitted using the
+ * same rate as the immediately previous frame in the frame exchange
+ * sequence, if this rate belongs to the PHY mandatory rates, or else
+ * at the highest possible rate belonging to the PHY rates in the
+ * BSSBasicRateSet
+ */
+
+ if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) {
+ /* TODO: These control frames are not currently sent by
+ * 80211.o, but should they be implemented, this function
+ * needs to be updated to support duration field calculation.
+ *
+ * RTS: time needed to transmit pending data/mgmt frame plus
+ * one CTS frame plus one ACK frame plus 3 x SIFS
+ * CTS: duration of immediately previous RTS minus time
+ * required to transmit CTS and its SIFS
+ * ACK: 0 if immediately previous directed data/mgmt had
+ * more=0, with more=1 duration in ACK frame is duration
+ * from previous frame minus time needed to transmit ACK
+ * and its SIFS
+ * PS Poll: BIT(15) | BIT(14) | aid
+ */
+ return 0;
+ }
+
+ /* data/mgmt */
+ if (0 /* FIX: data/mgmt during CFP */)
+ return 32768;
+
+ if (group_addr) /* Group address as the destination - no ACK */
+ return 0;
+
+ /* Individual destination address:
+ * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes)
+ * CTS and ACK frames shall be transmitted using the highest rate in
+ * basic rate set that is less than or equal to the rate of the
+ * immediately previous frame and that is using the same modulation
+ * (CCK or OFDM). If no basic rate set matches with these requirements,
+ * the highest mandatory rate of the PHY that is less than or equal to
+ * the rate of the previous frame is used.
+ * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps
+ */
+ rate = -1;
+ mrate = 10; /* use 1 Mbps if everything fails */
+ for (i = 0; i < mode->num_rates; i++) {
+ struct ieee80211_rate *r = &mode->rates[i];
+ if (r->rate > txrate->rate)
+ break;
+
+ if (IEEE80211_RATE_MODULATION(txrate->flags) !=
+ IEEE80211_RATE_MODULATION(r->flags))
+ continue;
+
+ if (r->flags & IEEE80211_RATE_BASIC)
+ rate = r->rate;
+ else if (r->flags & IEEE80211_RATE_MANDATORY)
+ mrate = r->rate;
+ }
+ if (rate == -1) {
+ /* No matching basic rate found; use highest suitable mandatory
+ * PHY rate */
+ rate = mrate;
+ }
+
+ /* Time needed to transmit ACK
+ * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up
+ * to closest integer */
+
+ dur = ieee80211_frame_duration(local, 10, rate, erp,
+ local->short_preamble);
+
+ if (next_frag_len) {
+ /* Frame is fragmented: duration increases with time needed to
+ * transmit next fragment plus ACK and 2 x SIFS. */
+ dur *= 2; /* ACK + SIFS */
+ /* next fragment */
+ dur += ieee80211_frame_duration(local, next_frag_len,
+ txrate->rate, erp,
+ local->short_preamble);
+ }
+
+ return dur;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+ u16 dur;
+ struct ieee80211_tx_control *control = tx->u.tx.control;
+ struct ieee80211_hw_mode *mode = tx->u.tx.mode;
+
+ if (!is_multicast_ether_addr(hdr->addr1)) {
+ if (tx->skb->len + FCS_LEN > tx->local->rts_threshold &&
+ tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD) {
+ control->flags |= IEEE80211_TXCTL_USE_RTS_CTS;
+ control->retry_limit =
+ tx->local->long_retry_limit;
+ } else {
+ control->retry_limit =
+ tx->local->short_retry_limit;
+ }
+ } else {
+ control->retry_limit = 1;
+ }
+
+ if (tx->fragmented) {
+ /* Do not use multiple retry rates when sending fragmented
+ * frames.
+ * TODO: The last fragment could still use multiple retry
+ * rates. */
+ control->alt_retry_rate = -1;
+ }
+
+ /* Use CTS protection for unicast frames sent using extended rates if
+ * there are associated non-ERP stations and RTS/CTS is not configured
+ * for the frame. */
+ if (mode->mode == MODE_IEEE80211G &&
+ (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) &&
+ tx->u.tx.unicast &&
+ tx->local->cts_protect_erp_frames &&
+ !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
+ control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT;
+
+ /* Setup duration field for the first fragment of the frame. Duration
+ * for remaining fragments will be updated when they are being sent
+ * to low-level driver in ieee80211_tx(). */
+ dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1),
+ tx->fragmented ? tx->u.tx.extra_frag[0]->len :
+ 0);
+ hdr->duration_id = cpu_to_le16(dur);
+
+ if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) ||
+ (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) {
+ struct ieee80211_rate *rate;
+
+ /* Do not use multiple retry rates when using RTS/CTS */
+ control->alt_retry_rate = -1;
+
+ /* Use min(data rate, max base rate) as CTS/RTS rate */
+ rate = tx->u.tx.rate;
+ while (rate > mode->rates &&
+ !(rate->flags & IEEE80211_RATE_BASIC))
+ rate--;
+
+ control->rts_cts_rate = rate->val;
+ control->rts_rate = rate;
+ }
+
+ if (tx->sta) {
+ tx->sta->tx_packets++;
+ tx->sta->tx_fragments++;
+ tx->sta->tx_bytes += tx->skb->len;
+ if (tx->u.tx.extra_frag) {
+ int i;
+ tx->sta->tx_fragments += tx->u.tx.num_extra_frag;
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ tx->sta->tx_bytes +=
+ tx->u.tx.extra_frag[i]->len;
+ }
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
+{
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ struct sk_buff *skb = tx->skb;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ u32 sta_flags;
+
+ if (unlikely(tx->local->sta_scanning != 0) &&
+ ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
+ (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
+ return TXRX_DROP;
+
+ if (tx->u.tx.ps_buffered)
+ return TXRX_CONTINUE;
+
+ sta_flags = tx->sta ? tx->sta->flags : 0;
+
+ if (likely(tx->u.tx.unicast)) {
+ if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
+ tx->sdata->type != IEEE80211_IF_TYPE_IBSS &&
+ (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: dropped data frame to not "
+ "associated station " MAC_FMT "\n",
+ tx->dev->name, MAC_ARG(hdr->addr1));
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc);
+ return TXRX_DROP;
+ }
+ } else {
+ if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+ tx->local->num_sta == 0 &&
+ !tx->local->allow_broadcast_always &&
+ tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) {
+ /*
+ * No associated STAs - no need to send multicast
+ * frames.
+ */
+ return TXRX_DROP;
+ }
+ return TXRX_CONTINUE;
+ }
+
+ if (unlikely(!tx->u.tx.mgmt_interface && tx->sdata->ieee802_1x &&
+ !(sta_flags & WLAN_STA_AUTHORIZED))) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: dropped frame to " MAC_FMT
+ " (unauthorized port)\n", tx->dev->name,
+ MAC_ARG(hdr->addr1));
+#endif
+ I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port);
+ return TXRX_DROP;
+ }
+
+ return TXRX_CONTINUE;
+}
+
+static ieee80211_txrx_result
+ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+
+ if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24)
+ ieee80211_include_sequence(tx->sdata, hdr);
+
+ return TXRX_CONTINUE;
+}
+
+/* This function is called whenever the AP is about to exceed the maximum limit
+ * of buffered frames for power saving STAs. This situation should not really
+ * happen often during normal operation, so dropping the oldest buffered packet
+ * from each queue should be OK to make some room for new frames. */
+static void purge_old_ps_buffers(struct ieee80211_local *local)
+{
+ int total = 0, purged = 0;
+ struct sk_buff *skb;
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ struct ieee80211_if_ap *ap;
+ if (sdata->dev == local->mdev ||
+ sdata->type != IEEE80211_IF_TYPE_AP)
+ continue;
+ ap = &sdata->u.ap;
+ skb = skb_dequeue(&ap->ps_bc_buf);
+ if (skb) {
+ purged++;
+ dev_kfree_skb(skb);
+ }
+ total += skb_queue_len(&ap->ps_bc_buf);
+ }
+ read_unlock(&local->sub_if_lock);
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ skb = skb_dequeue(&sta->ps_tx_buf);
+ if (skb) {
+ purged++;
+ dev_kfree_skb(skb);
+ }
+ total += skb_queue_len(&sta->ps_tx_buf);
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ local->total_ps_buffered = total;
+ printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n",
+ local->mdev->name, purged);
+}
+
+
+static inline ieee80211_txrx_result
+ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
+{
+ /* broadcast/multicast frame */
+ /* If any of the associated stations is in power save mode,
+ * the frame is buffered to be sent after DTIM beacon frame */
+ if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) &&
+ tx->sdata->type != IEEE80211_IF_TYPE_WDS &&
+ tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) &&
+ !(tx->fc & IEEE80211_FCTL_ORDER)) {
+ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
+ purge_old_ps_buffers(tx->local);
+ if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
+ AP_MAX_BC_BUFFER) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: BC TX buffer full - "
+ "dropping the oldest frame\n",
+ tx->dev->name);
+ }
+ dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf));
+ } else
+ tx->local->total_ps_buffered++;
+ skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb);
+ return TXRX_QUEUED;
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static inline ieee80211_txrx_result
+ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
+{
+ struct sta_info *sta = tx->sta;
+
+ if (unlikely(!sta ||
+ ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT &&
+ (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP)))
+ return TXRX_CONTINUE;
+
+ if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) {
+ struct ieee80211_tx_packet_data *pkt_data;
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS buffer (entries "
+ "before %d)\n",
+ MAC_ARG(sta->addr), sta->aid,
+ skb_queue_len(&sta->ps_tx_buf));
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+ sta->flags |= WLAN_STA_TIM;
+ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
+ purge_old_ps_buffers(tx->local);
+ if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) {
+ struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf);
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " TX "
+ "buffer full - dropping oldest frame\n",
+ tx->dev->name, MAC_ARG(sta->addr));
+ }
+ dev_kfree_skb(old);
+ } else
+ tx->local->total_ps_buffered++;
+ /* Queue frame to be sent after STA sends an PS Poll frame */
+ if (skb_queue_empty(&sta->ps_tx_buf)) {
+ if (tx->local->ops->set_tim)
+ tx->local->ops->set_tim(local_to_hw(tx->local),
+ sta->aid, 1);
+ if (tx->sdata->bss)
+ bss_tim_set(tx->local, tx->sdata->bss, sta->aid);
+ }
+ pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb;
+ pkt_data->jiffies = jiffies;
+ skb_queue_tail(&sta->ps_tx_buf, tx->skb);
+ return TXRX_QUEUED;
+ }
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ else if (unlikely(sta->flags & WLAN_STA_PS)) {
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " in PS mode, but pspoll "
+ "set -> send frame\n", tx->dev->name,
+ MAC_ARG(sta->addr));
+ }
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+ sta->pspoll = 0;
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
+{
+ if (unlikely(tx->u.tx.ps_buffered))
+ return TXRX_CONTINUE;
+
+ if (tx->u.tx.unicast)
+ return ieee80211_tx_h_unicast_ps_buf(tx);
+ else
+ return ieee80211_tx_h_multicast_ps_buf(tx);
+}
+
+
+static void inline
+__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
+ struct sk_buff *skb,
+ struct net_device *dev,
+ struct ieee80211_tx_control *control)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ int hdrlen;
+
+ memset(tx, 0, sizeof(*tx));
+ tx->skb = skb;
+ tx->dev = dev; /* use original interface */
+ tx->local = local;
+ tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ tx->sta = sta_info_get(local, hdr->addr1);
+ tx->fc = le16_to_cpu(hdr->frame_control);
+ control->power_level = local->hw.conf.power_level;
+ tx->u.tx.control = control;
+ tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1);
+ if (is_multicast_ether_addr(hdr->addr1))
+ control->flags |= IEEE80211_TXCTL_NO_ACK;
+ else
+ control->flags &= ~IEEE80211_TXCTL_NO_ACK;
+ tx->fragmented = local->fragmentation_threshold <
+ IEEE80211_MAX_FRAG_THRESHOLD && tx->u.tx.unicast &&
+ skb->len + FCS_LEN > local->fragmentation_threshold &&
+ (!local->ops->set_frag_threshold);
+ if (!tx->sta)
+ control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK;
+ else if (tx->sta->clear_dst_mask) {
+ control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK;
+ tx->sta->clear_dst_mask = 0;
+ }
+ control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
+ if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta)
+ control->antenna_sel_tx = tx->sta->antenna_sel_tx;
+ hdrlen = ieee80211_get_hdrlen(tx->fc);
+ if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) {
+ u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)];
+ tx->ethertype = (pos[0] << 8) | pos[1];
+ }
+ control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT;
+
+}
+
+static int inline is_ieee80211_device(struct net_device *dev,
+ struct net_device *master)
+{
+ return (wdev_priv(dev->ieee80211_ptr) ==
+ wdev_priv(master->ieee80211_ptr));
+}
+
+/* Device in tx->dev has a reference added; use dev_put(tx->dev) when
+ * finished with it. */
+static int inline ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
+ struct sk_buff *skb,
+ struct net_device *mdev,
+ struct ieee80211_tx_control *control)
+{
+ struct ieee80211_tx_packet_data *pkt_data;
+ struct net_device *dev;
+
+ pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
+ dev = dev_get_by_index(pkt_data->ifindex);
+ if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
+ dev_put(dev);
+ dev = NULL;
+ }
+ if (unlikely(!dev))
+ return -ENODEV;
+ __ieee80211_tx_prepare(tx, skb, dev, control);
+ return 0;
+}
+
+static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local,
+ int queue)
+{
+ return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]);
+}
+
+static inline int __ieee80211_queue_pending(const struct ieee80211_local *local,
+ int queue)
+{
+ return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]);
+}
+
+#define IEEE80211_TX_OK 0
+#define IEEE80211_TX_AGAIN 1
+#define IEEE80211_TX_FRAG_AGAIN 2
+
+static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_tx_control *control = tx->u.tx.control;
+ int ret, i;
+
+ if (!ieee80211_qdisc_installed(local->mdev) &&
+ __ieee80211_queue_stopped(local, 0)) {
+ netif_stop_queue(local->mdev);
+ return IEEE80211_TX_AGAIN;
+ }
+ if (skb) {
+ ieee80211_dump_frame(local->mdev->name, "TX to low-level driver", skb);
+ ret = local->ops->tx(local_to_hw(local), skb, control);
+ if (ret)
+ return IEEE80211_TX_AGAIN;
+ local->mdev->trans_start = jiffies;
+ ieee80211_led_tx(local, 1);
+ }
+ if (tx->u.tx.extra_frag) {
+ control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS |
+ IEEE80211_TXCTL_USE_CTS_PROTECT |
+ IEEE80211_TXCTL_CLEAR_DST_MASK |
+ IEEE80211_TXCTL_FIRST_FRAGMENT);
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ if (!tx->u.tx.extra_frag[i])
+ continue;
+ if (__ieee80211_queue_stopped(local, control->queue))
+ return IEEE80211_TX_FRAG_AGAIN;
+ if (i == tx->u.tx.num_extra_frag) {
+ control->tx_rate = tx->u.tx.last_frag_hwrate;
+ control->rate = tx->u.tx.last_frag_rate;
+ if (tx->u.tx.probe_last_frag)
+ control->flags |=
+ IEEE80211_TXCTL_RATE_CTRL_PROBE;
+ else
+ control->flags &=
+ ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
+ }
+
+ ieee80211_dump_frame(local->mdev->name,
+ "TX to low-level driver",
+ tx->u.tx.extra_frag[i]);
+ ret = local->ops->tx(local_to_hw(local),
+ tx->u.tx.extra_frag[i],
+ control);
+ if (ret)
+ return IEEE80211_TX_FRAG_AGAIN;
+ local->mdev->trans_start = jiffies;
+ ieee80211_led_tx(local, 1);
+ tx->u.tx.extra_frag[i] = NULL;
+ }
+ kfree(tx->u.tx.extra_frag);
+ tx->u.tx.extra_frag = NULL;
+ }
+ return IEEE80211_TX_OK;
+}
+
+static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_tx_control *control, int mgmt)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sta_info *sta;
+ ieee80211_tx_handler *handler;
+ struct ieee80211_txrx_data tx;
+ ieee80211_txrx_result res = TXRX_DROP;
+ int ret, i;
+
+ WARN_ON(__ieee80211_queue_pending(local, control->queue));
+
+ if (unlikely(skb->len < 10)) {
+ dev_kfree_skb(skb);
+ return 0;
+ }
+
+ __ieee80211_tx_prepare(&tx, skb, dev, control);
+ sta = tx.sta;
+ tx.u.tx.mgmt_interface = mgmt;
+ tx.u.tx.mode = local->hw.conf.mode;
+
+ for (handler = local->tx_handlers; *handler != NULL; handler++) {
+ res = (*handler)(&tx);
+ if (res != TXRX_CONTINUE)
+ break;
+ }
+
+ skb = tx.skb; /* handlers are allowed to change skb */
+
+ if (sta)
+ sta_info_put(sta);
+
+ if (unlikely(res == TXRX_DROP)) {
+ I802_DEBUG_INC(local->tx_handlers_drop);
+ goto drop;
+ }
+
+ if (unlikely(res == TXRX_QUEUED)) {
+ I802_DEBUG_INC(local->tx_handlers_queued);
+ return 0;
+ }
+
+ if (tx.u.tx.extra_frag) {
+ for (i = 0; i < tx.u.tx.num_extra_frag; i++) {
+ int next_len, dur;
+ struct ieee80211_hdr *hdr =
+ (struct ieee80211_hdr *)
+ tx.u.tx.extra_frag[i]->data;
+
+ if (i + 1 < tx.u.tx.num_extra_frag) {
+ next_len = tx.u.tx.extra_frag[i + 1]->len;
+ } else {
+ next_len = 0;
+ tx.u.tx.rate = tx.u.tx.last_frag_rate;
+ tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val;
+ }
+ dur = ieee80211_duration(&tx, 0, next_len);
+ hdr->duration_id = cpu_to_le16(dur);
+ }
+ }
+
+retry:
+ ret = __ieee80211_tx(local, skb, &tx);
+ if (ret) {
+ struct ieee80211_tx_stored_packet *store =
+ &local->pending_packet[control->queue];
+
+ if (ret == IEEE80211_TX_FRAG_AGAIN)
+ skb = NULL;
+ set_bit(IEEE80211_LINK_STATE_PENDING,
+ &local->state[control->queue]);
+ smp_mb();
+ /* When the driver gets out of buffers during sending of
+ * fragments and calls ieee80211_stop_queue, there is
+ * a small window between IEEE80211_LINK_STATE_XOFF and
+ * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer
+ * gets available in that window (i.e. driver calls
+ * ieee80211_wake_queue), we would end up with ieee80211_tx
+ * called with IEEE80211_LINK_STATE_PENDING. Prevent this by
+ * continuing transmitting here when that situation is
+ * possible to have happened. */
+ if (!__ieee80211_queue_stopped(local, control->queue)) {
+ clear_bit(IEEE80211_LINK_STATE_PENDING,
+ &local->state[control->queue]);
+ goto retry;
+ }
+ memcpy(&store->control, control,
+ sizeof(struct ieee80211_tx_control));
+ store->skb = skb;
+ store->extra_frag = tx.u.tx.extra_frag;
+ store->num_extra_frag = tx.u.tx.num_extra_frag;
+ store->last_frag_hwrate = tx.u.tx.last_frag_hwrate;
+ store->last_frag_rate = tx.u.tx.last_frag_rate;
+ store->last_frag_rate_ctrl_probe = tx.u.tx.probe_last_frag;
+ }
+ return 0;
+
+ drop:
+ if (skb)
+ dev_kfree_skb(skb);
+ for (i = 0; i < tx.u.tx.num_extra_frag; i++)
+ if (tx.u.tx.extra_frag[i])
+ dev_kfree_skb(tx.u.tx.extra_frag[i]);
+ kfree(tx.u.tx.extra_frag);
+ return 0;
+}
+
+static void ieee80211_tx_pending(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *)data;
+ struct net_device *dev = local->mdev;
+ struct ieee80211_tx_stored_packet *store;
+ struct ieee80211_txrx_data tx;
+ int i, ret, reschedule = 0;
+
+ netif_tx_lock_bh(dev);
+ for (i = 0; i < local->hw.queues; i++) {
+ if (__ieee80211_queue_stopped(local, i))
+ continue;
+ if (!__ieee80211_queue_pending(local, i)) {
+ reschedule = 1;
+ continue;
+ }
+ store = &local->pending_packet[i];
+ tx.u.tx.control = &store->control;
+ tx.u.tx.extra_frag = store->extra_frag;
+ tx.u.tx.num_extra_frag = store->num_extra_frag;
+ tx.u.tx.last_frag_hwrate = store->last_frag_hwrate;
+ tx.u.tx.last_frag_rate = store->last_frag_rate;
+ tx.u.tx.probe_last_frag = store->last_frag_rate_ctrl_probe;
+ ret = __ieee80211_tx(local, store->skb, &tx);
+ if (ret) {
+ if (ret == IEEE80211_TX_FRAG_AGAIN)
+ store->skb = NULL;
+ } else {
+ clear_bit(IEEE80211_LINK_STATE_PENDING,
+ &local->state[i]);
+ reschedule = 1;
+ }
+ }
+ netif_tx_unlock_bh(dev);
+ if (reschedule) {
+ if (!ieee80211_qdisc_installed(dev)) {
+ if (!__ieee80211_queue_stopped(local, 0))
+ netif_wake_queue(dev);
+ } else
+ netif_schedule(dev);
+ }
+}
+
+static void ieee80211_clear_tx_pending(struct ieee80211_local *local)
+{
+ int i, j;
+ struct ieee80211_tx_stored_packet *store;
+
+ for (i = 0; i < local->hw.queues; i++) {
+ if (!__ieee80211_queue_pending(local, i))
+ continue;
+ store = &local->pending_packet[i];
+ kfree_skb(store->skb);
+ for (j = 0; j < store->num_extra_frag; j++)
+ kfree_skb(store->extra_frag[j]);
+ kfree(store->extra_frag);
+ clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]);
+ }
+}
+
+static int ieee80211_master_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ieee80211_tx_control control;
+ struct ieee80211_tx_packet_data *pkt_data;
+ struct net_device *odev = NULL;
+ struct ieee80211_sub_if_data *osdata;
+ int headroom;
+ int ret;
+
+ /*
+ * copy control out of the skb so other people can use skb->cb
+ */
+ pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
+ memset(&control, 0, sizeof(struct ieee80211_tx_control));
+
+ if (pkt_data->ifindex)
+ odev = dev_get_by_index(pkt_data->ifindex);
+ if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
+ dev_put(odev);
+ odev = NULL;
+ }
+ if (unlikely(!odev)) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: Discarded packet with nonexistent "
+ "originating device\n", dev->name);
+#endif
+ dev_kfree_skb(skb);
+ return 0;
+ }
+ osdata = IEEE80211_DEV_TO_SUB_IF(odev);
+
+ headroom = osdata->local->hw.extra_tx_headroom +
+ IEEE80211_ENCRYPT_HEADROOM;
+ if (skb_headroom(skb) < headroom) {
+ if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
+ dev_kfree_skb(skb);
+ return 0;
+ }
+ }
+
+ control.ifindex = odev->ifindex;
+ control.type = osdata->type;
+ if (pkt_data->req_tx_status)
+ control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
+ if (pkt_data->do_not_encrypt)
+ control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
+ if (pkt_data->requeue)
+ control.flags |= IEEE80211_TXCTL_REQUEUE;
+ control.queue = pkt_data->queue;
+
+ ret = ieee80211_tx(odev, skb, &control,
+ control.type == IEEE80211_IF_TYPE_MGMT);
+ dev_put(odev);
+
+ return ret;
+}
+
+
+/**
+ * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type
+ * subinterfaces (wlan#, WDS, and VLAN interfaces)
+ * @skb: packet to be sent
+ * @dev: incoming interface
+ *
+ * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will
+ * not be freed, and caller is responsible for either retrying later or freeing
+ * skb).
+ *
+ * This function takes in an Ethernet header and encapsulates it with suitable
+ * IEEE 802.11 header based on which interface the packet is coming in. The
+ * encapsulated packet will then be passed to master interface, wlan#.11, for
+ * transmission (through low-level driver).
+ */
+static int ieee80211_subif_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_tx_packet_data *pkt_data;
+ struct ieee80211_sub_if_data *sdata;
+ int ret = 1, head_need;
+ u16 ethertype, hdrlen, fc;
+ struct ieee80211_hdr hdr;
+ const u8 *encaps_data;
+ int encaps_len, skip_header_bytes;
+ int nh_pos, h_pos, no_encrypt = 0;
+ struct sta_info *sta;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (unlikely(skb->len < ETH_HLEN)) {
+ printk(KERN_DEBUG "%s: short skb (len=%d)\n",
+ dev->name, skb->len);
+ ret = 0;
+ goto fail;
+ }
+
+ nh_pos = skb_network_header(skb) - skb->data;
+ h_pos = skb_transport_header(skb) - skb->data;
+
+ /* convert Ethernet header to proper 802.11 header (based on
+ * operation mode) */
+ ethertype = (skb->data[12] << 8) | skb->data[13];
+ /* TODO: handling for 802.1x authorized/unauthorized port */
+ fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
+
+ if (likely(sdata->type == IEEE80211_IF_TYPE_AP ||
+ sdata->type == IEEE80211_IF_TYPE_VLAN)) {
+ fc |= IEEE80211_FCTL_FROMDS;
+ /* DA BSSID SA */
+ memcpy(hdr.addr1, skb->data, ETH_ALEN);
+ memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+ memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
+ hdrlen = 24;
+ } else if (sdata->type == IEEE80211_IF_TYPE_WDS) {
+ fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
+ /* RA TA DA SA */
+ memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN);
+ memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+ memcpy(hdr.addr3, skb->data, ETH_ALEN);
+ memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+ hdrlen = 30;
+ } else if (sdata->type == IEEE80211_IF_TYPE_STA) {
+ fc |= IEEE80211_FCTL_TODS;
+ /* BSSID SA DA */
+ memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN);
+ memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+ memcpy(hdr.addr3, skb->data, ETH_ALEN);
+ hdrlen = 24;
+ } else if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ /* DA SA BSSID */
+ memcpy(hdr.addr1, skb->data, ETH_ALEN);
+ memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+ memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN);
+ hdrlen = 24;
+ } else {
+ ret = 0;
+ goto fail;
+ }
+
+ /* receiver is QoS enabled, use a QoS type frame */
+ sta = sta_info_get(local, hdr.addr1);
+ if (sta) {
+ if (sta->flags & WLAN_STA_WME) {
+ fc |= IEEE80211_STYPE_QOS_DATA;
+ hdrlen += 2;
+ }
+ sta_info_put(sta);
+ }
+
+ hdr.frame_control = cpu_to_le16(fc);
+ hdr.duration_id = 0;
+ hdr.seq_ctrl = 0;
+
+ skip_header_bytes = ETH_HLEN;
+ if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
+ encaps_data = bridge_tunnel_header;
+ encaps_len = sizeof(bridge_tunnel_header);
+ skip_header_bytes -= 2;
+ } else if (ethertype >= 0x600) {
+ encaps_data = rfc1042_header;
+ encaps_len = sizeof(rfc1042_header);
+ skip_header_bytes -= 2;
+ } else {
+ encaps_data = NULL;
+ encaps_len = 0;
+ }
+
+ skb_pull(skb, skip_header_bytes);
+ nh_pos -= skip_header_bytes;
+ h_pos -= skip_header_bytes;
+
+ /* TODO: implement support for fragments so that there is no need to
+ * reallocate and copy payload; it might be enough to support one
+ * extra fragment that would be copied in the beginning of the frame
+ * data.. anyway, it would be nice to include this into skb structure
+ * somehow
+ *
+ * There are few options for this:
+ * use skb->cb as an extra space for 802.11 header
+ * allocate new buffer if not enough headroom
+ * make sure that there is enough headroom in every skb by increasing
+ * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and
+ * alloc_skb() (net/core/skbuff.c)
+ */
+ head_need = hdrlen + encaps_len + local->hw.extra_tx_headroom;
+ head_need -= skb_headroom(skb);
+
+ /* We are going to modify skb data, so make a copy of it if happens to
+ * be cloned. This could happen, e.g., with Linux bridge code passing
+ * us broadcast frames. */
+
+ if (head_need > 0 || skb_cloned(skb)) {
+#if 0
+ printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes "
+ "of headroom\n", dev->name, head_need);
+#endif
+
+ if (skb_cloned(skb))
+ I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
+ else
+ I802_DEBUG_INC(local->tx_expand_skb_head);
+ /* Since we have to reallocate the buffer, make sure that there
+ * is enough room for possible WEP IV/ICV and TKIP (8 bytes
+ * before payload and 12 after). */
+ if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8),
+ 12, GFP_ATOMIC)) {
+ printk(KERN_DEBUG "%s: failed to reallocate TX buffer"
+ "\n", dev->name);
+ goto fail;
+ }
+ }
+
+ if (encaps_data) {
+ memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
+ nh_pos += encaps_len;
+ h_pos += encaps_len;
+ }
+ memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
+ nh_pos += hdrlen;
+ h_pos += hdrlen;
+
+ pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
+ memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
+ pkt_data->ifindex = sdata->dev->ifindex;
+ pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT);
+ pkt_data->do_not_encrypt = no_encrypt;
+
+ skb->dev = local->mdev;
+ sdata->stats.tx_packets++;
+ sdata->stats.tx_bytes += skb->len;
+
+ /* Update skb pointers to various headers since this modified frame
+ * is going to go through Linux networking code that may potentially
+ * need things like pointer to IP header. */
+ skb_set_mac_header(skb, 0);
+ skb_set_network_header(skb, nh_pos);
+ skb_set_transport_header(skb, h_pos);
+
+ dev->trans_start = jiffies;
+ dev_queue_xmit(skb);
+
+ return 0;
+
+ fail:
+ if (!ret)
+ dev_kfree_skb(skb);
+
+ return ret;
+}
+
+
+/*
+ * This is the transmit routine for the 802.11 type interfaces
+ * called by upper layers of the linux networking
+ * stack when it has a frame to transmit
+ */
+static int
+ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_tx_packet_data *pkt_data;
+ struct ieee80211_hdr *hdr;
+ u16 fc;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (skb->len < 10) {
+ dev_kfree_skb(skb);
+ return 0;
+ }
+
+ if (skb_headroom(skb) < sdata->local->hw.extra_tx_headroom) {
+ if (pskb_expand_head(skb,
+ sdata->local->hw.extra_tx_headroom, 0, GFP_ATOMIC)) {
+ dev_kfree_skb(skb);
+ return 0;
+ }
+ }
+
+ hdr = (struct ieee80211_hdr *) skb->data;
+ fc = le16_to_cpu(hdr->frame_control);
+
+ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+ memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
+ pkt_data->ifindex = sdata->dev->ifindex;
+ pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT);
+
+ skb->priority = 20; /* use hardcoded priority for mgmt TX queue */
+ skb->dev = sdata->local->mdev;
+
+ /*
+ * We're using the protocol field of the the frame control header
+ * to request TX callback for hostapd. BIT(1) is checked.
+ */
+ if ((fc & BIT(1)) == BIT(1)) {
+ pkt_data->req_tx_status = 1;
+ fc &= ~BIT(1);
+ hdr->frame_control = cpu_to_le16(fc);
+ }
+
+ pkt_data->do_not_encrypt = !(fc & IEEE80211_FCTL_PROTECTED);
+
+ sdata->stats.tx_packets++;
+ sdata->stats.tx_bytes += skb->len;
+
+ dev_queue_xmit(skb);
+
+ return 0;
+}
+
+
+static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
+ struct ieee80211_if_ap *bss,
+ struct sk_buff *skb)
+{
+ u8 *pos, *tim;
+ int aid0 = 0;
+ int i, have_bits = 0, n1, n2;
+
+ /* Generate bitmap for TIM only if there are any STAs in power save
+ * mode. */
+ spin_lock_bh(&local->sta_lock);
+ if (atomic_read(&bss->num_sta_ps) > 0)
+ /* in the hope that this is faster than
+ * checking byte-for-byte */
+ have_bits = !bitmap_empty((unsigned long*)bss->tim,
+ IEEE80211_MAX_AID+1);
+
+ if (bss->dtim_count == 0)
+ bss->dtim_count = bss->dtim_period - 1;
+ else
+ bss->dtim_count--;
+
+ tim = pos = (u8 *) skb_put(skb, 6);
+ *pos++ = WLAN_EID_TIM;
+ *pos++ = 4;
+ *pos++ = bss->dtim_count;
+ *pos++ = bss->dtim_period;
+
+ if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
+ aid0 = 1;
+
+ if (have_bits) {
+ /* Find largest even number N1 so that bits numbered 1 through
+ * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits
+ * (N2 + 1) x 8 through 2007 are 0. */
+ n1 = 0;
+ for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) {
+ if (bss->tim[i]) {
+ n1 = i & 0xfe;
+ break;
+ }
+ }
+ n2 = n1;
+ for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) {
+ if (bss->tim[i]) {
+ n2 = i;
+ break;
+ }
+ }
+
+ /* Bitmap control */
+ *pos++ = n1 | aid0;
+ /* Part Virt Bitmap */
+ memcpy(pos, bss->tim + n1, n2 - n1 + 1);
+
+ tim[1] = n2 - n1 + 4;
+ skb_put(skb, n2 - n1);
+ } else {
+ *pos++ = aid0; /* Bitmap control */
+ *pos++ = 0; /* Part Virt Bitmap */
+ }
+ spin_unlock_bh(&local->sta_lock);
+}
+
+
+struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
+ struct ieee80211_tx_control *control)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct sk_buff *skb;
+ struct net_device *bdev;
+ struct ieee80211_sub_if_data *sdata = NULL;
+ struct ieee80211_if_ap *ap = NULL;
+ struct ieee80211_rate *rate;
+ struct rate_control_extra extra;
+ u8 *b_head, *b_tail;
+ int bh_len, bt_len;
+
+ bdev = dev_get_by_index(if_id);
+ if (bdev) {
+ sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
+ ap = &sdata->u.ap;
+ dev_put(bdev);
+ }
+
+ if (!ap || sdata->type != IEEE80211_IF_TYPE_AP ||
+ !ap->beacon_head) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ if (net_ratelimit())
+ printk(KERN_DEBUG "no beacon data avail for idx=%d "
+ "(%s)\n", if_id, bdev ? bdev->name : "N/A");
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ return NULL;
+ }
+
+ /* Assume we are generating the normal beacon locally */
+ b_head = ap->beacon_head;
+ b_tail = ap->beacon_tail;
+ bh_len = ap->beacon_head_len;
+ bt_len = ap->beacon_tail_len;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ bh_len + bt_len + 256 /* maximum TIM len */);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ memcpy(skb_put(skb, bh_len), b_head, bh_len);
+
+ ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data);
+
+ ieee80211_beacon_add_tim(local, ap, skb);
+
+ if (b_tail) {
+ memcpy(skb_put(skb, bt_len), b_tail, bt_len);
+ }
+
+ if (control) {
+ memset(&extra, 0, sizeof(extra));
+ extra.mode = local->oper_hw_mode;
+
+ rate = rate_control_get_rate(local, local->mdev, skb, &extra);
+ if (!rate) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate "
+ "found\n", local->mdev->name);
+ }
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+
+ control->tx_rate = (local->short_preamble &&
+ (rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
+ rate->val2 : rate->val;
+ control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
+ control->power_level = local->hw.conf.power_level;
+ control->flags |= IEEE80211_TXCTL_NO_ACK;
+ control->retry_limit = 1;
+ control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK;
+ }
+
+ ap->num_beacons++;
+ return skb;
+}
+EXPORT_SYMBOL(ieee80211_beacon_get);
+
+__le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
+ size_t frame_len,
+ const struct ieee80211_tx_control *frame_txctl)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_rate *rate;
+ int short_preamble = local->short_preamble;
+ int erp;
+ u16 dur;
+
+ rate = frame_txctl->rts_rate;
+ erp = !!(rate->flags & IEEE80211_RATE_ERP);
+
+ /* CTS duration */
+ dur = ieee80211_frame_duration(local, 10, rate->rate,
+ erp, short_preamble);
+ /* Data frame duration */
+ dur += ieee80211_frame_duration(local, frame_len, rate->rate,
+ erp, short_preamble);
+ /* ACK duration */
+ dur += ieee80211_frame_duration(local, 10, rate->rate,
+ erp, short_preamble);
+
+ return cpu_to_le16(dur);
+}
+EXPORT_SYMBOL(ieee80211_rts_duration);
+
+
+__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
+ size_t frame_len,
+ const struct ieee80211_tx_control *frame_txctl)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_rate *rate;
+ int short_preamble = local->short_preamble;
+ int erp;
+ u16 dur;
+
+ rate = frame_txctl->rts_rate;
+ erp = !!(rate->flags & IEEE80211_RATE_ERP);
+
+ /* Data frame duration */
+ dur = ieee80211_frame_duration(local, frame_len, rate->rate,
+ erp, short_preamble);
+ if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) {
+ /* ACK duration */
+ dur += ieee80211_frame_duration(local, 10, rate->rate,
+ erp, short_preamble);
+ }
+
+ return cpu_to_le16(dur);
+}
+EXPORT_SYMBOL(ieee80211_ctstoself_duration);
+
+void ieee80211_rts_get(struct ieee80211_hw *hw,
+ const void *frame, size_t frame_len,
+ const struct ieee80211_tx_control *frame_txctl,
+ struct ieee80211_rts *rts)
+{
+ const struct ieee80211_hdr *hdr = frame;
+ u16 fctl;
+
+ fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS;
+ rts->frame_control = cpu_to_le16(fctl);
+ rts->duration = ieee80211_rts_duration(hw, frame_len, frame_txctl);
+ memcpy(rts->ra, hdr->addr1, sizeof(rts->ra));
+ memcpy(rts->ta, hdr->addr2, sizeof(rts->ta));
+}
+EXPORT_SYMBOL(ieee80211_rts_get);
+
+void ieee80211_ctstoself_get(struct ieee80211_hw *hw,
+ const void *frame, size_t frame_len,
+ const struct ieee80211_tx_control *frame_txctl,
+ struct ieee80211_cts *cts)
+{
+ const struct ieee80211_hdr *hdr = frame;
+ u16 fctl;
+
+ fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS;
+ cts->frame_control = cpu_to_le16(fctl);
+ cts->duration = ieee80211_ctstoself_duration(hw, frame_len, frame_txctl);
+ memcpy(cts->ra, hdr->addr1, sizeof(cts->ra));
+}
+EXPORT_SYMBOL(ieee80211_ctstoself_get);
+
+struct sk_buff *
+ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
+ struct ieee80211_tx_control *control)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct sk_buff *skb;
+ struct sta_info *sta;
+ ieee80211_tx_handler *handler;
+ struct ieee80211_txrx_data tx;
+ ieee80211_txrx_result res = TXRX_DROP;
+ struct net_device *bdev;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_ap *bss = NULL;
+
+ bdev = dev_get_by_index(if_id);
+ if (bdev) {
+ sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
+ bss = &sdata->u.ap;
+ dev_put(bdev);
+ }
+ if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head)
+ return NULL;
+
+ if (bss->dtim_count != 0)
+ return NULL; /* send buffered bc/mc only after DTIM beacon */
+ memset(control, 0, sizeof(*control));
+ while (1) {
+ skb = skb_dequeue(&bss->ps_bc_buf);
+ if (!skb)
+ return NULL;
+ local->total_ps_buffered--;
+
+ if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) {
+ struct ieee80211_hdr *hdr =
+ (struct ieee80211_hdr *) skb->data;
+ /* more buffered multicast/broadcast frames ==> set
+ * MoreData flag in IEEE 802.11 header to inform PS
+ * STAs */
+ hdr->frame_control |=
+ cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+ }
+
+ if (ieee80211_tx_prepare(&tx, skb, local->mdev, control) == 0)
+ break;
+ dev_kfree_skb_any(skb);
+ }
+ sta = tx.sta;
+ tx.u.tx.ps_buffered = 1;
+
+ for (handler = local->tx_handlers; *handler != NULL; handler++) {
+ res = (*handler)(&tx);
+ if (res == TXRX_DROP || res == TXRX_QUEUED)
+ break;
+ }
+ dev_put(tx.dev);
+ skb = tx.skb; /* handlers are allowed to change skb */
+
+ if (res == TXRX_DROP) {
+ I802_DEBUG_INC(local->tx_handlers_drop);
+ dev_kfree_skb(skb);
+ skb = NULL;
+ } else if (res == TXRX_QUEUED) {
+ I802_DEBUG_INC(local->tx_handlers_queued);
+ skb = NULL;
+ }
+
+ if (sta)
+ sta_info_put(sta);
+
+ return skb;
+}
+EXPORT_SYMBOL(ieee80211_get_buffered_bc);
+
+static int __ieee80211_if_config(struct net_device *dev,
+ struct sk_buff *beacon,
+ struct ieee80211_tx_control *control)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_if_conf conf;
+ static u8 scan_bssid[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ if (!local->ops->config_interface || !netif_running(dev))
+ return 0;
+
+ memset(&conf, 0, sizeof(conf));
+ conf.type = sdata->type;
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ if (local->sta_scanning &&
+ local->scan_dev == dev)
+ conf.bssid = scan_bssid;
+ else
+ conf.bssid = sdata->u.sta.bssid;
+ conf.ssid = sdata->u.sta.ssid;
+ conf.ssid_len = sdata->u.sta.ssid_len;
+ conf.generic_elem = sdata->u.sta.extra_ie;
+ conf.generic_elem_len = sdata->u.sta.extra_ie_len;
+ } else if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ conf.ssid = sdata->u.ap.ssid;
+ conf.ssid_len = sdata->u.ap.ssid_len;
+ conf.generic_elem = sdata->u.ap.generic_elem;
+ conf.generic_elem_len = sdata->u.ap.generic_elem_len;
+ conf.beacon = beacon;
+ conf.beacon_control = control;
+ }
+ return local->ops->config_interface(local_to_hw(local),
+ dev->ifindex, &conf);
+}
+
+int ieee80211_if_config(struct net_device *dev)
+{
+ return __ieee80211_if_config(dev, NULL, NULL);
+}
+
+int ieee80211_if_config_beacon(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_tx_control control;
+ struct sk_buff *skb;
+
+ if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
+ return 0;
+ skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control);
+ if (!skb)
+ return -ENOMEM;
+ return __ieee80211_if_config(dev, skb, &control);
+}
+
+int ieee80211_hw_config(struct ieee80211_local *local)
+{
+ struct ieee80211_hw_mode *mode;
+ struct ieee80211_channel *chan;
+ int ret = 0;
+
+ if (local->sta_scanning) {
+ chan = local->scan_channel;
+ mode = local->scan_hw_mode;
+ } else {
+ chan = local->oper_channel;
+ mode = local->oper_hw_mode;
+ }
+
+ local->hw.conf.channel = chan->chan;
+ local->hw.conf.channel_val = chan->val;
+ local->hw.conf.power_level = chan->power_level;
+ local->hw.conf.freq = chan->freq;
+ local->hw.conf.phymode = mode->mode;
+ local->hw.conf.antenna_max = chan->antenna_max;
+ local->hw.conf.chan = chan;
+ local->hw.conf.mode = mode;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "HW CONFIG: channel=%d freq=%d "
+ "phymode=%d\n", local->hw.conf.channel, local->hw.conf.freq,
+ local->hw.conf.phymode);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+
+ if (local->ops->config)
+ ret = local->ops->config(local_to_hw(local), &local->hw.conf);
+
+ return ret;
+}
+
+
+static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
+{
+ /* FIX: what would be proper limits for MTU?
+ * This interface uses 802.3 frames. */
+ if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) {
+ printk(KERN_WARNING "%s: invalid MTU %d\n",
+ dev->name, new_mtu);
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+
+static int ieee80211_change_mtu_apdev(struct net_device *dev, int new_mtu)
+{
+ /* FIX: what would be proper limits for MTU?
+ * This interface uses 802.11 frames. */
+ if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) {
+ printk(KERN_WARNING "%s: invalid MTU %d\n",
+ dev->name, new_mtu);
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+enum netif_tx_lock_class {
+ TX_LOCK_NORMAL,
+ TX_LOCK_MASTER,
+};
+
+static inline void netif_tx_lock_nested(struct net_device *dev, int subclass)
+{
+ spin_lock_nested(&dev->_xmit_lock, subclass);
+ dev->xmit_lock_owner = smp_processor_id();
+}
+
+static void ieee80211_set_multicast_list(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ unsigned short flags;
+
+ netif_tx_lock_nested(local->mdev, TX_LOCK_MASTER);
+ if (((dev->flags & IFF_ALLMULTI) != 0) ^ (sdata->allmulti != 0)) {
+ if (sdata->allmulti) {
+ sdata->allmulti = 0;
+ local->iff_allmultis--;
+ } else {
+ sdata->allmulti = 1;
+ local->iff_allmultis++;
+ }
+ }
+ if (((dev->flags & IFF_PROMISC) != 0) ^ (sdata->promisc != 0)) {
+ if (sdata->promisc) {
+ sdata->promisc = 0;
+ local->iff_promiscs--;
+ } else {
+ sdata->promisc = 1;
+ local->iff_promiscs++;
+ }
+ }
+ if (dev->mc_count != sdata->mc_count) {
+ local->mc_count = local->mc_count - sdata->mc_count +
+ dev->mc_count;
+ sdata->mc_count = dev->mc_count;
+ }
+ if (local->ops->set_multicast_list) {
+ flags = local->mdev->flags;
+ if (local->iff_allmultis)
+ flags |= IFF_ALLMULTI;
+ if (local->iff_promiscs)
+ flags |= IFF_PROMISC;
+ read_lock(&local->sub_if_lock);
+ local->ops->set_multicast_list(local_to_hw(local), flags,
+ local->mc_count);
+ read_unlock(&local->sub_if_lock);
+ }
+ netif_tx_unlock(local->mdev);
+}
+
+struct dev_mc_list *ieee80211_get_mc_list_item(struct ieee80211_hw *hw,
+ struct dev_mc_list *prev,
+ void **ptr)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata = *ptr;
+ struct dev_mc_list *mc;
+
+ if (!prev) {
+ WARN_ON(sdata);
+ sdata = NULL;
+ }
+ if (!prev || !prev->next) {
+ if (sdata)
+ sdata = list_entry(sdata->list.next,
+ struct ieee80211_sub_if_data, list);
+ else
+ sdata = list_entry(local->sub_if_list.next,
+ struct ieee80211_sub_if_data, list);
+ if (&sdata->list != &local->sub_if_list)
+ mc = sdata->dev->mc_list;
+ else
+ mc = NULL;
+ } else
+ mc = prev->next;
+
+ *ptr = sdata;
+ return mc;
+}
+EXPORT_SYMBOL(ieee80211_get_mc_list_item);
+
+static struct net_device_stats *ieee80211_get_stats(struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ return &(sdata->stats);
+}
+
+static void ieee80211_if_shutdown(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ ASSERT_RTNL();
+ switch (sdata->type) {
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ sdata->u.sta.state = IEEE80211_DISABLED;
+ del_timer_sync(&sdata->u.sta.timer);
+ skb_queue_purge(&sdata->u.sta.skb_queue);
+ if (!local->ops->hw_scan &&
+ local->scan_dev == sdata->dev) {
+ local->sta_scanning = 0;
+ cancel_delayed_work(&local->scan_work);
+ }
+ flush_workqueue(local->hw.workqueue);
+ break;
+ }
+}
+
+static inline int identical_mac_addr_allowed(int type1, int type2)
+{
+ return (type1 == IEEE80211_IF_TYPE_MNTR ||
+ type2 == IEEE80211_IF_TYPE_MNTR ||
+ (type1 == IEEE80211_IF_TYPE_AP &&
+ type2 == IEEE80211_IF_TYPE_WDS) ||
+ (type1 == IEEE80211_IF_TYPE_WDS &&
+ (type2 == IEEE80211_IF_TYPE_WDS ||
+ type2 == IEEE80211_IF_TYPE_AP)) ||
+ (type1 == IEEE80211_IF_TYPE_AP &&
+ type2 == IEEE80211_IF_TYPE_VLAN) ||
+ (type1 == IEEE80211_IF_TYPE_VLAN &&
+ (type2 == IEEE80211_IF_TYPE_AP ||
+ type2 == IEEE80211_IF_TYPE_VLAN)));
+}
+
+static int ieee80211_master_open(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ int res = -EOPNOTSUPP;
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ if (sdata->dev != dev && netif_running(sdata->dev)) {
+ res = 0;
+ break;
+ }
+ }
+ read_unlock(&local->sub_if_lock);
+ return res;
+}
+
+static int ieee80211_master_stop(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list)
+ if (sdata->dev != dev && netif_running(sdata->dev))
+ dev_close(sdata->dev);
+ read_unlock(&local->sub_if_lock);
+
+ return 0;
+}
+
+static int ieee80211_mgmt_open(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (!netif_running(local->mdev))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static int ieee80211_mgmt_stop(struct net_device *dev)
+{
+ return 0;
+}
+
+/* Check if running monitor interfaces should go to a "soft monitor" mode
+ * and switch them if necessary. */
+static inline void ieee80211_start_soft_monitor(struct ieee80211_local *local)
+{
+ struct ieee80211_if_init_conf conf;
+
+ if (local->open_count && local->open_count == local->monitors &&
+ !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) &&
+ local->ops->remove_interface) {
+ conf.if_id = -1;
+ conf.type = IEEE80211_IF_TYPE_MNTR;
+ conf.mac_addr = NULL;
+ local->ops->remove_interface(local_to_hw(local), &conf);
+ }
+}
+
+/* Check if running monitor interfaces should go to a "hard monitor" mode
+ * and switch them if necessary. */
+static void ieee80211_start_hard_monitor(struct ieee80211_local *local)
+{
+ struct ieee80211_if_init_conf conf;
+
+ if (local->open_count && local->open_count == local->monitors &&
+ !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) &&
+ local->ops->add_interface) {
+ conf.if_id = -1;
+ conf.type = IEEE80211_IF_TYPE_MNTR;
+ conf.mac_addr = NULL;
+ local->ops->add_interface(local_to_hw(local), &conf);
+ }
+}
+
+static int ieee80211_open(struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata, *nsdata;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_if_init_conf conf;
+ int res;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(nsdata, &local->sub_if_list, list) {
+ struct net_device *ndev = nsdata->dev;
+
+ if (ndev != dev && ndev != local->mdev && netif_running(ndev) &&
+ compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0 &&
+ !identical_mac_addr_allowed(sdata->type, nsdata->type)) {
+ read_unlock(&local->sub_if_lock);
+ return -ENOTUNIQ;
+ }
+ }
+ read_unlock(&local->sub_if_lock);
+
+ if (sdata->type == IEEE80211_IF_TYPE_WDS &&
+ is_zero_ether_addr(sdata->u.wds.remote_addr))
+ return -ENOLINK;
+
+ if (sdata->type == IEEE80211_IF_TYPE_MNTR && local->open_count &&
+ !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) {
+ /* run the interface in a "soft monitor" mode */
+ local->monitors++;
+ local->open_count++;
+ local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
+ return 0;
+ }
+ ieee80211_start_soft_monitor(local);
+
+ if (local->ops->add_interface) {
+ conf.if_id = dev->ifindex;
+ conf.type = sdata->type;
+ conf.mac_addr = dev->dev_addr;
+ res = local->ops->add_interface(local_to_hw(local), &conf);
+ if (res) {
+ if (sdata->type == IEEE80211_IF_TYPE_MNTR)
+ ieee80211_start_hard_monitor(local);
+ return res;
+ }
+ } else {
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return -EOPNOTSUPP;
+ if (local->open_count > 0)
+ return -ENOBUFS;
+ }
+
+ if (local->open_count == 0) {
+ res = 0;
+ tasklet_enable(&local->tx_pending_tasklet);
+ tasklet_enable(&local->tasklet);
+ if (local->ops->open)
+ res = local->ops->open(local_to_hw(local));
+ if (res == 0) {
+ res = dev_open(local->mdev);
+ if (res) {
+ if (local->ops->stop)
+ local->ops->stop(local_to_hw(local));
+ } else {
+ res = ieee80211_hw_config(local);
+ if (res && local->ops->stop)
+ local->ops->stop(local_to_hw(local));
+ else if (!res && local->apdev)
+ dev_open(local->apdev);
+ }
+ }
+ if (res) {
+ if (local->ops->remove_interface)
+ local->ops->remove_interface(local_to_hw(local),
+ &conf);
+ return res;
+ }
+ }
+ local->open_count++;
+
+ if (sdata->type == IEEE80211_IF_TYPE_MNTR) {
+ local->monitors++;
+ local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
+ } else
+ ieee80211_if_config(dev);
+
+ if (sdata->type == IEEE80211_IF_TYPE_STA &&
+ !local->user_space_mlme)
+ netif_carrier_off(dev);
+
+ netif_start_queue(dev);
+ return 0;
+}
+
+
+static int ieee80211_stop(struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (sdata->type == IEEE80211_IF_TYPE_MNTR &&
+ local->open_count > 1 &&
+ !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) {
+ /* remove "soft monitor" interface */
+ local->open_count--;
+ local->monitors--;
+ if (!local->monitors)
+ local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
+ return 0;
+ }
+
+ netif_stop_queue(dev);
+ ieee80211_if_shutdown(dev);
+
+ if (sdata->type == IEEE80211_IF_TYPE_MNTR) {
+ local->monitors--;
+ if (!local->monitors)
+ local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
+ }
+
+ local->open_count--;
+ if (local->open_count == 0) {
+ if (netif_running(local->mdev))
+ dev_close(local->mdev);
+ if (local->apdev)
+ dev_close(local->apdev);
+ if (local->ops->stop)
+ local->ops->stop(local_to_hw(local));
+ tasklet_disable(&local->tx_pending_tasklet);
+ tasklet_disable(&local->tasklet);
+ }
+ if (local->ops->remove_interface) {
+ struct ieee80211_if_init_conf conf;
+
+ conf.if_id = dev->ifindex;
+ conf.type = sdata->type;
+ conf.mac_addr = dev->dev_addr;
+ local->ops->remove_interface(local_to_hw(local), &conf);
+ }
+
+ ieee80211_start_hard_monitor(local);
+
+ return 0;
+}
+
+
+static int header_parse_80211(struct sk_buff *skb, unsigned char *haddr)
+{
+ memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
+ return ETH_ALEN;
+}
+
+static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
+{
+ return compare_ether_addr(raddr, addr) == 0 ||
+ is_broadcast_ether_addr(raddr);
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
+{
+ struct net_device *dev = rx->dev;
+ struct ieee80211_local *local = rx->local;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+ u16 fc, hdrlen, ethertype;
+ u8 *payload;
+ u8 dst[ETH_ALEN];
+ u8 src[ETH_ALEN];
+ struct sk_buff *skb = rx->skb, *skb2;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ fc = rx->fc;
+ if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
+ return TXRX_CONTINUE;
+
+ if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+ return TXRX_DROP;
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ /* convert IEEE 802.11 header + possible LLC headers into Ethernet
+ * header
+ * IEEE 802.11 address fields:
+ * ToDS FromDS Addr1 Addr2 Addr3 Addr4
+ * 0 0 DA SA BSSID n/a
+ * 0 1 DA BSSID SA n/a
+ * 1 0 BSSID SA DA n/a
+ * 1 1 RA TA DA SA
+ */
+
+ switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
+ case IEEE80211_FCTL_TODS:
+ /* BSSID SA DA */
+ memcpy(dst, hdr->addr3, ETH_ALEN);
+ memcpy(src, hdr->addr2, ETH_ALEN);
+
+ if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP &&
+ sdata->type != IEEE80211_IF_TYPE_VLAN)) {
+ printk(KERN_DEBUG "%s: dropped ToDS frame (BSSID="
+ MAC_FMT " SA=" MAC_FMT " DA=" MAC_FMT ")\n",
+ dev->name, MAC_ARG(hdr->addr1),
+ MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3));
+ return TXRX_DROP;
+ }
+ break;
+ case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
+ /* RA TA DA SA */
+ memcpy(dst, hdr->addr3, ETH_ALEN);
+ memcpy(src, hdr->addr4, ETH_ALEN);
+
+ if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) {
+ printk(KERN_DEBUG "%s: dropped FromDS&ToDS frame (RA="
+ MAC_FMT " TA=" MAC_FMT " DA=" MAC_FMT " SA="
+ MAC_FMT ")\n",
+ rx->dev->name, MAC_ARG(hdr->addr1),
+ MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3),
+ MAC_ARG(hdr->addr4));
+ return TXRX_DROP;
+ }
+ break;
+ case IEEE80211_FCTL_FROMDS:
+ /* DA BSSID SA */
+ memcpy(dst, hdr->addr1, ETH_ALEN);
+ memcpy(src, hdr->addr3, ETH_ALEN);
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA) {
+ return TXRX_DROP;
+ }
+ break;
+ case 0:
+ /* DA SA BSSID */
+ memcpy(dst, hdr->addr1, ETH_ALEN);
+ memcpy(src, hdr->addr2, ETH_ALEN);
+
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: dropped IBSS frame (DA="
+ MAC_FMT " SA=" MAC_FMT " BSSID=" MAC_FMT
+ ")\n",
+ dev->name, MAC_ARG(hdr->addr1),
+ MAC_ARG(hdr->addr2),
+ MAC_ARG(hdr->addr3));
+ }
+ return TXRX_DROP;
+ }
+ break;
+ }
+
+ payload = skb->data + hdrlen;
+
+ if (unlikely(skb->len - hdrlen < 8)) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: RX too short data frame "
+ "payload\n", dev->name);
+ }
+ return TXRX_DROP;
+ }
+
+ ethertype = (payload[6] << 8) | payload[7];
+
+ if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
+ ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+ compare_ether_addr(payload, bridge_tunnel_header) == 0)) {
+ /* remove RFC1042 or Bridge-Tunnel encapsulation and
+ * replace EtherType */
+ skb_pull(skb, hdrlen + 6);
+ memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
+ memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
+ } else {
+ struct ethhdr *ehdr;
+ __be16 len;
+ skb_pull(skb, hdrlen);
+ len = htons(skb->len);
+ ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
+ memcpy(ehdr->h_dest, dst, ETH_ALEN);
+ memcpy(ehdr->h_source, src, ETH_ALEN);
+ ehdr->h_proto = len;
+ }
+ skb->dev = dev;
+
+ skb2 = NULL;
+
+ sdata->stats.rx_packets++;
+ sdata->stats.rx_bytes += skb->len;
+
+ if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP
+ || sdata->type == IEEE80211_IF_TYPE_VLAN) && rx->u.rx.ra_match) {
+ if (is_multicast_ether_addr(skb->data)) {
+ /* send multicast frames both to higher layers in
+ * local net stack and back to the wireless media */
+ skb2 = skb_copy(skb, GFP_ATOMIC);
+ if (!skb2)
+ printk(KERN_DEBUG "%s: failed to clone "
+ "multicast frame\n", dev->name);
+ } else {
+ struct sta_info *dsta;
+ dsta = sta_info_get(local, skb->data);
+ if (dsta && !dsta->dev) {
+ printk(KERN_DEBUG "Station with null dev "
+ "structure!\n");
+ } else if (dsta && dsta->dev == dev) {
+ /* Destination station is associated to this
+ * AP, so send the frame directly to it and
+ * do not pass the frame to local net stack.
+ */
+ skb2 = skb;
+ skb = NULL;
+ }
+ if (dsta)
+ sta_info_put(dsta);
+ }
+ }
+
+ if (skb) {
+ /* deliver to local stack */
+ skb->protocol = eth_type_trans(skb, dev);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ netif_rx(skb);
+ }
+
+ if (skb2) {
+ /* send to wireless media */
+ skb2->protocol = __constant_htons(ETH_P_802_3);
+ skb_set_network_header(skb2, 0);
+ skb_set_mac_header(skb2, 0);
+ dev_queue_xmit(skb2);
+ }
+
+ return TXRX_QUEUED;
+}
+
+
+static struct ieee80211_rate *
+ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate)
+{
+ struct ieee80211_hw_mode *mode;
+ int r;
+
+ list_for_each_entry(mode, &local->modes_list, list) {
+ if (mode->mode != phymode)
+ continue;
+ for (r = 0; r < mode->num_rates; r++) {
+ struct ieee80211_rate *rate = &mode->rates[r];
+ if (rate->val == hw_rate ||
+ (rate->flags & IEEE80211_RATE_PREAMBLE2 &&
+ rate->val2 == hw_rate))
+ return rate;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+ieee80211_fill_frame_info(struct ieee80211_local *local,
+ struct ieee80211_frame_info *fi,
+ struct ieee80211_rx_status *status)
+{
+ if (status) {
+ struct timespec ts;
+ struct ieee80211_rate *rate;
+
+ jiffies_to_timespec(jiffies, &ts);
+ fi->hosttime = cpu_to_be64((u64) ts.tv_sec * 1000000 +
+ ts.tv_nsec / 1000);
+ fi->mactime = cpu_to_be64(status->mactime);
+ switch (status->phymode) {
+ case MODE_IEEE80211A:
+ fi->phytype = htonl(ieee80211_phytype_ofdm_dot11_a);
+ break;
+ case MODE_IEEE80211B:
+ fi->phytype = htonl(ieee80211_phytype_dsss_dot11_b);
+ break;
+ case MODE_IEEE80211G:
+ fi->phytype = htonl(ieee80211_phytype_pbcc_dot11_g);
+ break;
+ case MODE_ATHEROS_TURBO:
+ fi->phytype =
+ htonl(ieee80211_phytype_dsss_dot11_turbo);
+ break;
+ default:
+ fi->phytype = htonl(0xAAAAAAAA);
+ break;
+ }
+ fi->channel = htonl(status->channel);
+ rate = ieee80211_get_rate(local, status->phymode,
+ status->rate);
+ if (rate) {
+ fi->datarate = htonl(rate->rate);
+ if (rate->flags & IEEE80211_RATE_PREAMBLE2) {
+ if (status->rate == rate->val)
+ fi->preamble = htonl(2); /* long */
+ else if (status->rate == rate->val2)
+ fi->preamble = htonl(1); /* short */
+ } else
+ fi->preamble = htonl(0);
+ } else {
+ fi->datarate = htonl(0);
+ fi->preamble = htonl(0);
+ }
+
+ fi->antenna = htonl(status->antenna);
+ fi->priority = htonl(0xffffffff); /* no clue */
+ fi->ssi_type = htonl(ieee80211_ssi_raw);
+ fi->ssi_signal = htonl(status->ssi);
+ fi->ssi_noise = 0x00000000;
+ fi->encoding = 0;
+ } else {
+ /* clear everything because we really don't know.
+ * the msg_type field isn't present on monitor frames
+ * so we don't know whether it will be present or not,
+ * but it's ok to not clear it since it'll be assigned
+ * anyway */
+ memset(fi, 0, sizeof(*fi) - sizeof(fi->msg_type));
+
+ fi->ssi_type = htonl(ieee80211_ssi_none);
+ }
+ fi->version = htonl(IEEE80211_FI_VERSION);
+ fi->length = cpu_to_be32(sizeof(*fi) - sizeof(fi->msg_type));
+}
+
+/* this routine is actually not just for this, but also
+ * for pushing fake 'management' frames into userspace.
+ * it shall be replaced by a netlink-based system. */
+void
+ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_rx_status *status, u32 msg_type)
+{
+ struct ieee80211_frame_info *fi;
+ const size_t hlen = sizeof(struct ieee80211_frame_info);
+ struct ieee80211_sub_if_data *sdata;
+
+ skb->dev = local->apdev;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(local->apdev);
+
+ if (skb_headroom(skb) < hlen) {
+ I802_DEBUG_INC(local->rx_expand_skb_head);
+ if (pskb_expand_head(skb, hlen, 0, GFP_ATOMIC)) {
+ dev_kfree_skb(skb);
+ return;
+ }
+ }
+
+ fi = (struct ieee80211_frame_info *) skb_push(skb, hlen);
+
+ ieee80211_fill_frame_info(local, fi, status);
+ fi->msg_type = htonl(msg_type);
+
+ sdata->stats.rx_packets++;
+ sdata->stats.rx_bytes += skb->len;
+
+ skb_set_mac_header(skb, 0);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->protocol = htons(ETH_P_802_2);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ netif_rx(skb);
+}
+
+static void
+ieee80211_rx_monitor(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_rx_status *status)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_rate *rate;
+ struct ieee80211_rtap_hdr {
+ struct ieee80211_radiotap_header hdr;
+ u8 flags;
+ u8 rate;
+ __le16 chan_freq;
+ __le16 chan_flags;
+ u8 antsignal;
+ } __attribute__ ((packed)) *rthdr;
+
+ skb->dev = dev;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (status->flag & RX_FLAG_RADIOTAP)
+ goto out;
+
+ if (skb_headroom(skb) < sizeof(*rthdr)) {
+ I802_DEBUG_INC(local->rx_expand_skb_head);
+ if (pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) {
+ dev_kfree_skb(skb);
+ return;
+ }
+ }
+
+ rthdr = (struct ieee80211_rtap_hdr *) skb_push(skb, sizeof(*rthdr));
+ memset(rthdr, 0, sizeof(*rthdr));
+ rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr));
+ rthdr->hdr.it_present =
+ cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
+ (1 << IEEE80211_RADIOTAP_RATE) |
+ (1 << IEEE80211_RADIOTAP_CHANNEL) |
+ (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL));
+ rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ?
+ IEEE80211_RADIOTAP_F_FCS : 0;
+ rate = ieee80211_get_rate(local, status->phymode, status->rate);
+ if (rate)
+ rthdr->rate = rate->rate / 5;
+ rthdr->chan_freq = cpu_to_le16(status->freq);
+ rthdr->chan_flags =
+ status->phymode == MODE_IEEE80211A ?
+ cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ) :
+ cpu_to_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ);
+ rthdr->antsignal = status->ssi;
+
+ out:
+ sdata->stats.rx_packets++;
+ sdata->stats.rx_bytes += skb->len;
+
+ skb_set_mac_header(skb, 0);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->protocol = htons(ETH_P_802_2);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ netif_rx(skb);
+}
+
+int ieee80211_radar_status(struct ieee80211_hw *hw, int channel,
+ int radar, int radar_type)
+{
+ struct sk_buff *skb;
+ struct ieee80211_radar_info *msg;
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (!local->apdev)
+ return 0;
+
+ skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) +
+ sizeof(struct ieee80211_radar_info));
+
+ if (!skb)
+ return -ENOMEM;
+ skb_reserve(skb, sizeof(struct ieee80211_frame_info));
+
+ msg = (struct ieee80211_radar_info *)
+ skb_put(skb, sizeof(struct ieee80211_radar_info));
+ msg->channel = channel;
+ msg->radar = radar;
+ msg->radar_type = radar_type;
+
+ ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_radar);
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_radar_status);
+
+int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, u8 *peer_address,
+ u16 aid)
+{
+ struct sk_buff *skb;
+ struct ieee80211_msg_set_aid_for_sta *msg;
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ /* unlikely because if this event only happens for APs,
+ * which require an open ap device. */
+ if (unlikely(!local->apdev))
+ return 0;
+
+ skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) +
+ sizeof(struct ieee80211_msg_set_aid_for_sta));
+
+ if (!skb)
+ return -ENOMEM;
+ skb_reserve(skb, sizeof(struct ieee80211_frame_info));
+
+ msg = (struct ieee80211_msg_set_aid_for_sta *)
+ skb_put(skb, sizeof(struct ieee80211_msg_set_aid_for_sta));
+ memcpy(msg->sta_address, peer_address, ETH_ALEN);
+ msg->aid = aid;
+
+ ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_set_aid_for_sta);
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_set_aid_for_sta);
+
+static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata;
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+
+ if (sdata->bss)
+ atomic_inc(&sdata->bss->num_sta_ps);
+ sta->flags |= WLAN_STA_PS;
+ sta->pspoll = 0;
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d enters power "
+ "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid);
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+}
+
+
+static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sk_buff *skb;
+ int sent = 0;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_tx_packet_data *pkt_data;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+ if (sdata->bss)
+ atomic_dec(&sdata->bss->num_sta_ps);
+ sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM);
+ sta->pspoll = 0;
+ if (!skb_queue_empty(&sta->ps_tx_buf)) {
+ if (local->ops->set_tim)
+ local->ops->set_tim(local_to_hw(local), sta->aid, 0);
+ if (sdata->bss)
+ bss_tim_clear(local, sdata->bss, sta->aid);
+ }
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d exits power "
+ "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid);
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+ /* Send all buffered frames to the station */
+ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
+ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+ sent++;
+ pkt_data->requeue = 1;
+ dev_queue_xmit(skb);
+ }
+ while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
+ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+ local->total_ps_buffered--;
+ sent++;
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d send PS frame "
+ "since STA not sleeping anymore\n", dev->name,
+ MAC_ARG(sta->addr), sta->aid);
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+ pkt_data->requeue = 1;
+ dev_queue_xmit(skb);
+ }
+
+ return sent;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
+{
+ struct sk_buff *skb;
+ int no_pending_pkts;
+
+ if (likely(!rx->sta ||
+ (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL ||
+ (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL ||
+ !rx->u.rx.ra_match))
+ return TXRX_CONTINUE;
+
+ skb = skb_dequeue(&rx->sta->tx_filtered);
+ if (!skb) {
+ skb = skb_dequeue(&rx->sta->ps_tx_buf);
+ if (skb)
+ rx->local->total_ps_buffered--;
+ }
+ no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) &&
+ skb_queue_empty(&rx->sta->ps_tx_buf);
+
+ if (skb) {
+ struct ieee80211_hdr *hdr =
+ (struct ieee80211_hdr *) skb->data;
+
+ /* tell TX path to send one frame even though the STA may
+ * still remain is PS mode after this frame exchange */
+ rx->sta->pspoll = 1;
+
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS Poll (entries "
+ "after %d)\n",
+ MAC_ARG(rx->sta->addr), rx->sta->aid,
+ skb_queue_len(&rx->sta->ps_tx_buf));
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+
+ /* Use MoreData flag to indicate whether there are more
+ * buffered frames for this STA */
+ if (no_pending_pkts) {
+ hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA);
+ rx->sta->flags &= ~WLAN_STA_TIM;
+ } else
+ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+
+ dev_queue_xmit(skb);
+
+ if (no_pending_pkts) {
+ if (rx->local->ops->set_tim)
+ rx->local->ops->set_tim(local_to_hw(rx->local),
+ rx->sta->aid, 0);
+ if (rx->sdata->bss)
+ bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid);
+ }
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
+ } else if (!rx->u.rx.sent_ps_buffered) {
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " sent PS Poll even "
+ "though there is no buffered frames for it\n",
+ rx->dev->name, MAC_ARG(rx->sta->addr));
+#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
+
+ }
+
+ /* Free PS Poll skb here instead of returning TXRX_DROP that would
+ * count as an dropped frame. */
+ dev_kfree_skb(rx->skb);
+
+ return TXRX_QUEUED;
+}
+
+
+static inline struct ieee80211_fragment_entry *
+ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ unsigned int frag, unsigned int seq, int rx_queue,
+ struct sk_buff **skb)
+{
+ struct ieee80211_fragment_entry *entry;
+ int idx;
+
+ idx = sdata->fragment_next;
+ entry = &sdata->fragments[sdata->fragment_next++];
+ if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
+ sdata->fragment_next = 0;
+
+ if (!skb_queue_empty(&entry->skb_list)) {
+#ifdef CONFIG_MAC80211_DEBUG
+ struct ieee80211_hdr *hdr =
+ (struct ieee80211_hdr *) entry->skb_list.next->data;
+ printk(KERN_DEBUG "%s: RX reassembly removed oldest "
+ "fragment entry (idx=%d age=%lu seq=%d last_frag=%d "
+ "addr1=" MAC_FMT " addr2=" MAC_FMT "\n",
+ sdata->dev->name, idx,
+ jiffies - entry->first_frag_time, entry->seq,
+ entry->last_frag, MAC_ARG(hdr->addr1),
+ MAC_ARG(hdr->addr2));
+#endif /* CONFIG_MAC80211_DEBUG */
+ __skb_queue_purge(&entry->skb_list);
+ }
+
+ __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
+ *skb = NULL;
+ entry->first_frag_time = jiffies;
+ entry->seq = seq;
+ entry->rx_queue = rx_queue;
+ entry->last_frag = frag;
+ entry->ccmp = 0;
+ entry->extra_len = 0;
+
+ return entry;
+}
+
+
+static inline struct ieee80211_fragment_entry *
+ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ u16 fc, unsigned int frag, unsigned int seq,
+ int rx_queue, struct ieee80211_hdr *hdr)
+{
+ struct ieee80211_fragment_entry *entry;
+ int i, idx;
+
+ idx = sdata->fragment_next;
+ for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
+ struct ieee80211_hdr *f_hdr;
+ u16 f_fc;
+
+ idx--;
+ if (idx < 0)
+ idx = IEEE80211_FRAGMENT_MAX - 1;
+
+ entry = &sdata->fragments[idx];
+ if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
+ entry->rx_queue != rx_queue ||
+ entry->last_frag + 1 != frag)
+ continue;
+
+ f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data;
+ f_fc = le16_to_cpu(f_hdr->frame_control);
+
+ if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) ||
+ compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 ||
+ compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0)
+ continue;
+
+ if (entry->first_frag_time + 2 * HZ < jiffies) {
+ __skb_queue_purge(&entry->skb_list);
+ continue;
+ }
+ return entry;
+ }
+
+ return NULL;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_hdr *hdr;
+ u16 sc;
+ unsigned int frag, seq;
+ struct ieee80211_fragment_entry *entry;
+ struct sk_buff *skb;
+
+ hdr = (struct ieee80211_hdr *) rx->skb->data;
+ sc = le16_to_cpu(hdr->seq_ctrl);
+ frag = sc & IEEE80211_SCTL_FRAG;
+
+ if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) ||
+ (rx->skb)->len < 24 ||
+ is_multicast_ether_addr(hdr->addr1))) {
+ /* not fragmented */
+ goto out;
+ }
+ I802_DEBUG_INC(rx->local->rx_handlers_fragments);
+
+ seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
+
+ if (frag == 0) {
+ /* This is the first fragment of a new frame. */
+ entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ rx->u.rx.queue, &(rx->skb));
+ if (rx->key && rx->key->alg == ALG_CCMP &&
+ (rx->fc & IEEE80211_FCTL_PROTECTED)) {
+ /* Store CCMP PN so that we can verify that the next
+ * fragment has a sequential PN value. */
+ entry->ccmp = 1;
+ memcpy(entry->last_pn,
+ rx->key->u.ccmp.rx_pn[rx->u.rx.queue],
+ CCMP_PN_LEN);
+ }
+ return TXRX_QUEUED;
+ }
+
+ /* This is a fragment for a frame that should already be pending in
+ * fragment cache. Add this fragment to the end of the pending entry.
+ */
+ entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq,
+ rx->u.rx.queue, hdr);
+ if (!entry) {
+ I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
+ return TXRX_DROP;
+ }
+
+ /* Verify that MPDUs within one MSDU have sequential PN values.
+ * (IEEE 802.11i, 8.3.3.4.5) */
+ if (entry->ccmp) {
+ int i;
+ u8 pn[CCMP_PN_LEN], *rpn;
+ if (!rx->key || rx->key->alg != ALG_CCMP)
+ return TXRX_DROP;
+ memcpy(pn, entry->last_pn, CCMP_PN_LEN);
+ for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
+ pn[i]++;
+ if (pn[i])
+ break;
+ }
+ rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue];
+ if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) {
+ printk(KERN_DEBUG "%s: defrag: CCMP PN not sequential"
+ " A2=" MAC_FMT " PN=%02x%02x%02x%02x%02x%02x "
+ "(expected %02x%02x%02x%02x%02x%02x)\n",
+ rx->dev->name, MAC_ARG(hdr->addr2),
+ rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], rpn[5],
+ pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
+ return TXRX_DROP;
+ }
+ memcpy(entry->last_pn, pn, CCMP_PN_LEN);
+ }
+
+ skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc));
+ __skb_queue_tail(&entry->skb_list, rx->skb);
+ entry->last_frag = frag;
+ entry->extra_len += rx->skb->len;
+ if (rx->fc & IEEE80211_FCTL_MOREFRAGS) {
+ rx->skb = NULL;
+ return TXRX_QUEUED;
+ }
+
+ rx->skb = __skb_dequeue(&entry->skb_list);
+ if (skb_tailroom(rx->skb) < entry->extra_len) {
+ I802_DEBUG_INC(rx->local->rx_expand_skb_head2);
+ if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len,
+ GFP_ATOMIC))) {
+ I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
+ __skb_queue_purge(&entry->skb_list);
+ return TXRX_DROP;
+ }
+ }
+ while ((skb = __skb_dequeue(&entry->skb_list)))
+ memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len);
+
+ /* Complete frame has been reassembled - process it now */
+ rx->fragmented = 1;
+
+ out:
+ if (rx->sta)
+ rx->sta->rx_packets++;
+ if (is_multicast_ether_addr(hdr->addr1))
+ rx->local->dot11MulticastReceivedFrameCount++;
+ else
+ ieee80211_led_rx(rx->local);
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_monitor(struct ieee80211_txrx_data *rx)
+{
+ if (rx->sdata->type == IEEE80211_IF_TYPE_MNTR) {
+ ieee80211_rx_monitor(rx->dev, rx->skb, rx->u.rx.status);
+ return TXRX_QUEUED;
+ }
+
+ if (rx->u.rx.status->flag & RX_FLAG_RADIOTAP)
+ skb_pull(rx->skb, ieee80211_get_radiotap_len(rx->skb));
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_hdr *hdr;
+ int always_sta_key;
+ hdr = (struct ieee80211_hdr *) rx->skb->data;
+
+ /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
+ if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
+ if (unlikely(rx->fc & IEEE80211_FCTL_RETRY &&
+ rx->sta->last_seq_ctrl[rx->u.rx.queue] ==
+ hdr->seq_ctrl)) {
+ if (rx->u.rx.ra_match) {
+ rx->local->dot11FrameDuplicateCount++;
+ rx->sta->num_duplicates++;
+ }
+ return TXRX_DROP;
+ } else
+ rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl;
+ }
+
+ if ((rx->local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) &&
+ rx->skb->len > FCS_LEN)
+ skb_trim(rx->skb, rx->skb->len - FCS_LEN);
+
+ if (unlikely(rx->skb->len < 16)) {
+ I802_DEBUG_INC(rx->local->rx_handlers_drop_short);
+ return TXRX_DROP;
+ }
+
+ if (!rx->u.rx.ra_match)
+ rx->skb->pkt_type = PACKET_OTHERHOST;
+ else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0)
+ rx->skb->pkt_type = PACKET_HOST;
+ else if (is_multicast_ether_addr(hdr->addr1)) {
+ if (is_broadcast_ether_addr(hdr->addr1))
+ rx->skb->pkt_type = PACKET_BROADCAST;
+ else
+ rx->skb->pkt_type = PACKET_MULTICAST;
+ } else
+ rx->skb->pkt_type = PACKET_OTHERHOST;
+
+ /* Drop disallowed frame classes based on STA auth/assoc state;
+ * IEEE 802.11, Chap 5.5.
+ *
+ * 80211.o does filtering only based on association state, i.e., it
+ * drops Class 3 frames from not associated stations. hostapd sends
+ * deauth/disassoc frames when needed. In addition, hostapd is
+ * responsible for filtering on both auth and assoc states.
+ */
+ if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA ||
+ ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
+ (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
+ rx->sdata->type != IEEE80211_IF_TYPE_IBSS &&
+ (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) {
+ if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
+ !(rx->fc & IEEE80211_FCTL_TODS) &&
+ (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)
+ || !rx->u.rx.ra_match) {
+ /* Drop IBSS frames and frames for other hosts
+ * silently. */
+ return TXRX_DROP;
+ }
+
+ if (!rx->local->apdev)
+ return TXRX_DROP;
+
+ ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_sta_not_assoc);
+ return TXRX_QUEUED;
+ }
+
+ if (rx->sdata->type == IEEE80211_IF_TYPE_STA)
+ always_sta_key = 0;
+ else
+ always_sta_key = 1;
+
+ if (rx->sta && rx->sta->key && always_sta_key) {
+ rx->key = rx->sta->key;
+ } else {
+ if (rx->sta && rx->sta->key)
+ rx->key = rx->sta->key;
+ else
+ rx->key = rx->sdata->default_key;
+
+ if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) &&
+ rx->fc & IEEE80211_FCTL_PROTECTED) {
+ int keyidx = ieee80211_wep_get_keyidx(rx->skb);
+
+ if (keyidx >= 0 && keyidx < NUM_DEFAULT_KEYS &&
+ (!rx->sta || !rx->sta->key || keyidx > 0))
+ rx->key = rx->sdata->keys[keyidx];
+
+ if (!rx->key) {
+ if (!rx->u.rx.ra_match)
+ return TXRX_DROP;
+ printk(KERN_DEBUG "%s: RX WEP frame with "
+ "unknown keyidx %d (A1=" MAC_FMT " A2="
+ MAC_FMT " A3=" MAC_FMT ")\n",
+ rx->dev->name, keyidx,
+ MAC_ARG(hdr->addr1),
+ MAC_ARG(hdr->addr2),
+ MAC_ARG(hdr->addr3));
+ if (!rx->local->apdev)
+ return TXRX_DROP;
+ ieee80211_rx_mgmt(
+ rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_wep_frame_unknown_key);
+ return TXRX_QUEUED;
+ }
+ }
+ }
+
+ if (rx->fc & IEEE80211_FCTL_PROTECTED && rx->key && rx->u.rx.ra_match) {
+ rx->key->tx_rx_count++;
+ if (unlikely(rx->local->key_tx_rx_threshold &&
+ rx->key->tx_rx_count >
+ rx->local->key_tx_rx_threshold)) {
+ ieee80211_key_threshold_notify(rx->dev, rx->key,
+ rx->sta);
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx)
+{
+ struct sta_info *sta = rx->sta;
+ struct net_device *dev = rx->dev;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+
+ if (!sta)
+ return TXRX_CONTINUE;
+
+ /* Update last_rx only for IBSS packets which are for the current
+ * BSSID to avoid keeping the current IBSS network alive in cases where
+ * other STAs are using different BSSID. */
+ if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len);
+ if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
+ sta->last_rx = jiffies;
+ } else
+ if (!is_multicast_ether_addr(hdr->addr1) ||
+ rx->sdata->type == IEEE80211_IF_TYPE_STA) {
+ /* Update last_rx only for unicast frames in order to prevent
+ * the Probe Request frames (the only broadcast frames from a
+ * STA in infrastructure mode) from keeping a connection alive.
+ */
+ sta->last_rx = jiffies;
+ }
+
+ if (!rx->u.rx.ra_match)
+ return TXRX_CONTINUE;
+
+ sta->rx_fragments++;
+ sta->rx_bytes += rx->skb->len;
+ sta->last_rssi = (sta->last_rssi * 15 +
+ rx->u.rx.status->ssi) / 16;
+ sta->last_signal = (sta->last_signal * 15 +
+ rx->u.rx.status->signal) / 16;
+ sta->last_noise = (sta->last_noise * 15 +
+ rx->u.rx.status->noise) / 16;
+
+ if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) {
+ /* Change STA power saving mode only in the end of a frame
+ * exchange sequence */
+ if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM))
+ rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta);
+ else if (!(sta->flags & WLAN_STA_PS) &&
+ (rx->fc & IEEE80211_FCTL_PM))
+ ap_sta_ps_start(dev, sta);
+ }
+
+ /* Drop data::nullfunc frames silently, since they are used only to
+ * control station power saving mode. */
+ if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+ (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) {
+ I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc);
+ /* Update counter and free packet here to avoid counting this
+ * as a dropped packed. */
+ sta->rx_packets++;
+ dev_kfree_skb(rx->skb);
+ return TXRX_QUEUED;
+ }
+
+ return TXRX_CONTINUE;
+} /* ieee80211_rx_h_sta_process */
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_wep_weak_iv_detection(struct ieee80211_txrx_data *rx)
+{
+ if (!rx->sta || !(rx->fc & IEEE80211_FCTL_PROTECTED) ||
+ (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+ !rx->key || rx->key->alg != ALG_WEP || !rx->u.rx.ra_match)
+ return TXRX_CONTINUE;
+
+ /* Check for weak IVs, if hwaccel did not remove IV from the frame */
+ if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) ||
+ rx->key->force_sw_encrypt) {
+ u8 *iv = ieee80211_wep_is_weak_iv(rx->skb, rx->key);
+ if (iv) {
+ rx->sta->wep_weak_iv_count++;
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_wep_decrypt(struct ieee80211_txrx_data *rx)
+{
+ /* If the device handles decryption totally, skip this test */
+ if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)
+ return TXRX_CONTINUE;
+
+ if ((rx->key && rx->key->alg != ALG_WEP) ||
+ !(rx->fc & IEEE80211_FCTL_PROTECTED) ||
+ ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
+ ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
+ (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)))
+ return TXRX_CONTINUE;
+
+ if (!rx->key) {
+ printk(KERN_DEBUG "%s: RX WEP frame, but no key set\n",
+ rx->dev->name);
+ return TXRX_DROP;
+ }
+
+ if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED) ||
+ rx->key->force_sw_encrypt) {
+ if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) {
+ printk(KERN_DEBUG "%s: RX WEP frame, decrypt "
+ "failed\n", rx->dev->name);
+ return TXRX_DROP;
+ }
+ } else if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) {
+ ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
+ /* remove ICV */
+ skb_trim(rx->skb, rx->skb->len - 4);
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx)
+{
+ if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) &&
+ rx->sdata->type != IEEE80211_IF_TYPE_STA && rx->u.rx.ra_match) {
+ /* Pass both encrypted and unencrypted EAPOL frames to user
+ * space for processing. */
+ if (!rx->local->apdev)
+ return TXRX_DROP;
+ ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_normal);
+ return TXRX_QUEUED;
+ }
+
+ if (unlikely(rx->sdata->ieee802_1x &&
+ (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+ (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
+ (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) &&
+ !ieee80211_is_eapol(rx->skb))) {
+#ifdef CONFIG_MAC80211_DEBUG
+ struct ieee80211_hdr *hdr =
+ (struct ieee80211_hdr *) rx->skb->data;
+ printk(KERN_DEBUG "%s: dropped frame from " MAC_FMT
+ " (unauthorized port)\n", rx->dev->name,
+ MAC_ARG(hdr->addr2));
+#endif /* CONFIG_MAC80211_DEBUG */
+ return TXRX_DROP;
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx)
+{
+ /* If the device handles decryption totally, skip this test */
+ if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)
+ return TXRX_CONTINUE;
+
+ /* Drop unencrypted frames if key is set. */
+ if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
+ (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+ (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
+ (rx->key || rx->sdata->drop_unencrypted) &&
+ (rx->sdata->eapol == 0 ||
+ !ieee80211_is_eapol(rx->skb)))) {
+ printk(KERN_DEBUG "%s: RX non-WEP frame, but expected "
+ "encryption\n", rx->dev->name);
+ return TXRX_DROP;
+ }
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ if (!rx->u.rx.ra_match)
+ return TXRX_DROP;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
+ if ((sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) &&
+ !rx->local->user_space_mlme) {
+ ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status);
+ } else {
+ /* Management frames are sent to hostapd for processing */
+ if (!rx->local->apdev)
+ return TXRX_DROP;
+ ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_normal);
+ }
+ return TXRX_QUEUED;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_local *local = rx->local;
+ struct sk_buff *skb = rx->skb;
+
+ if (unlikely(local->sta_scanning != 0)) {
+ ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status);
+ return TXRX_QUEUED;
+ }
+
+ if (unlikely(rx->u.rx.in_scan)) {
+ /* scanning finished during invoking of handlers */
+ I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
+ return TXRX_DROP;
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+static void ieee80211_rx_michael_mic_report(struct net_device *dev,
+ struct ieee80211_hdr *hdr,
+ struct sta_info *sta,
+ struct ieee80211_txrx_data *rx)
+{
+ int keyidx, hdrlen;
+
+ hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb);
+ if (rx->skb->len >= hdrlen + 4)
+ keyidx = rx->skb->data[hdrlen + 3] >> 6;
+ else
+ keyidx = -1;
+
+ /* TODO: verify that this is not triggered by fragmented
+ * frames (hw does not verify MIC for them). */
+ printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC "
+ "failure from " MAC_FMT " to " MAC_FMT " keyidx=%d\n",
+ dev->name, MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr1), keyidx);
+
+ if (!sta) {
+ /* Some hardware versions seem to generate incorrect
+ * Michael MIC reports; ignore them to avoid triggering
+ * countermeasures. */
+ printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
+ "error for unknown address " MAC_FMT "\n",
+ dev->name, MAC_ARG(hdr->addr2));
+ goto ignore;
+ }
+
+ if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) {
+ printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
+ "error for a frame with no ISWEP flag (src "
+ MAC_FMT ")\n", dev->name, MAC_ARG(hdr->addr2));
+ goto ignore;
+ }
+
+ if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) &&
+ rx->sdata->type == IEEE80211_IF_TYPE_AP) {
+ keyidx = ieee80211_wep_get_keyidx(rx->skb);
+ /* AP with Pairwise keys support should never receive Michael
+ * MIC errors for non-zero keyidx because these are reserved
+ * for group keys and only the AP is sending real multicast
+ * frames in BSS. */
+ if (keyidx) {
+ printk(KERN_DEBUG "%s: ignored Michael MIC error for "
+ "a frame with non-zero keyidx (%d) (src " MAC_FMT
+ ")\n", dev->name, keyidx, MAC_ARG(hdr->addr2));
+ goto ignore;
+ }
+ }
+
+ if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
+ ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
+ (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) {
+ printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
+ "error for a frame that cannot be encrypted "
+ "(fc=0x%04x) (src " MAC_FMT ")\n",
+ dev->name, rx->fc, MAC_ARG(hdr->addr2));
+ goto ignore;
+ }
+
+ do {
+ union iwreq_data wrqu;
+ char *buf = kmalloc(128, GFP_ATOMIC);
+ if (!buf)
+ break;
+
+ /* TODO: needed parameters: count, key type, TSC */
+ sprintf(buf, "MLME-MICHAELMICFAILURE.indication("
+ "keyid=%d %scast addr=" MAC_FMT ")",
+ keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni",
+ MAC_ARG(hdr->addr2));
+ memset(&wrqu, 0, sizeof(wrqu));
+ wrqu.data.length = strlen(buf);
+ wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf);
+ kfree(buf);
+ } while (0);
+
+ /* TODO: consider verifying the MIC error report with software
+ * implementation if we get too many spurious reports from the
+ * hardware. */
+ if (!rx->local->apdev)
+ goto ignore;
+ ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_michael_mic_failure);
+ return;
+
+ ignore:
+ dev_kfree_skb(rx->skb);
+ rx->skb = NULL;
+}
+
+static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers(
+ struct ieee80211_local *local,
+ ieee80211_rx_handler *handlers,
+ struct ieee80211_txrx_data *rx,
+ struct sta_info *sta)
+{
+ ieee80211_rx_handler *handler;
+ ieee80211_txrx_result res = TXRX_DROP;
+
+ for (handler = handlers; *handler != NULL; handler++) {
+ res = (*handler)(rx);
+ if (res != TXRX_CONTINUE) {
+ if (res == TXRX_DROP) {
+ I802_DEBUG_INC(local->rx_handlers_drop);
+ if (sta)
+ sta->rx_dropped++;
+ }
+ if (res == TXRX_QUEUED)
+ I802_DEBUG_INC(local->rx_handlers_queued);
+ break;
+ }
+ }
+
+ if (res == TXRX_DROP) {
+ dev_kfree_skb(rx->skb);
+ }
+ return res;
+}
+
+static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local,
+ ieee80211_rx_handler *handlers,
+ struct ieee80211_txrx_data *rx,
+ struct sta_info *sta)
+{
+ if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) ==
+ TXRX_CONTINUE)
+ dev_kfree_skb(rx->skb);
+}
+
+/*
+ * This is the receive path handler. It is called by a low level driver when an
+ * 802.11 MPDU is received from the hardware.
+ */
+void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
+ struct ieee80211_rx_status *status)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+ struct ieee80211_hdr *hdr;
+ struct ieee80211_txrx_data rx;
+ u16 type;
+ int multicast;
+ int radiotap_len = 0;
+
+ if (status->flag & RX_FLAG_RADIOTAP) {
+ radiotap_len = ieee80211_get_radiotap_len(skb);
+ skb_pull(skb, radiotap_len);
+ }
+
+ hdr = (struct ieee80211_hdr *) skb->data;
+ memset(&rx, 0, sizeof(rx));
+ rx.skb = skb;
+ rx.local = local;
+
+ rx.u.rx.status = status;
+ rx.fc = skb->len >= 2 ? le16_to_cpu(hdr->frame_control) : 0;
+ type = rx.fc & IEEE80211_FCTL_FTYPE;
+ if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT)
+ local->dot11ReceivedFragmentCount++;
+ multicast = is_multicast_ether_addr(hdr->addr1);
+
+ if (skb->len >= 16)
+ sta = rx.sta = sta_info_get(local, hdr->addr2);
+ else
+ sta = rx.sta = NULL;
+
+ if (sta) {
+ rx.dev = sta->dev;
+ rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev);
+ }
+
+ if ((status->flag & RX_FLAG_MMIC_ERROR)) {
+ ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx);
+ goto end;
+ }
+
+ if (unlikely(local->sta_scanning))
+ rx.u.rx.in_scan = 1;
+
+ if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx,
+ sta) != TXRX_CONTINUE)
+ goto end;
+ skb = rx.skb;
+
+ skb_push(skb, radiotap_len);
+ if (sta && !sta->assoc_ap && !(sta->flags & WLAN_STA_WDS) &&
+ !local->iff_promiscs && !multicast) {
+ rx.u.rx.ra_match = 1;
+ ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx,
+ sta);
+ } else {
+ struct ieee80211_sub_if_data *prev = NULL;
+ struct sk_buff *skb_new;
+ u8 *bssid = ieee80211_get_bssid(hdr, skb->len - radiotap_len);
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ rx.u.rx.ra_match = 1;
+ switch (sdata->type) {
+ case IEEE80211_IF_TYPE_STA:
+ if (!bssid)
+ continue;
+ if (!ieee80211_bssid_match(bssid,
+ sdata->u.sta.bssid)) {
+ if (!rx.u.rx.in_scan)
+ continue;
+ rx.u.rx.ra_match = 0;
+ } else if (!multicast &&
+ compare_ether_addr(sdata->dev->dev_addr,
+ hdr->addr1) != 0) {
+ if (!sdata->promisc)
+ continue;
+ rx.u.rx.ra_match = 0;
+ }
+ break;
+ case IEEE80211_IF_TYPE_IBSS:
+ if (!bssid)
+ continue;
+ if (!ieee80211_bssid_match(bssid,
+ sdata->u.sta.bssid)) {
+ if (!rx.u.rx.in_scan)
+ continue;
+ rx.u.rx.ra_match = 0;
+ } else if (!multicast &&
+ compare_ether_addr(sdata->dev->dev_addr,
+ hdr->addr1) != 0) {
+ if (!sdata->promisc)
+ continue;
+ rx.u.rx.ra_match = 0;
+ } else if (!sta)
+ sta = rx.sta =
+ ieee80211_ibss_add_sta(sdata->dev,
+ skb, bssid,
+ hdr->addr2);
+ break;
+ case IEEE80211_IF_TYPE_AP:
+ if (!bssid) {
+ if (compare_ether_addr(sdata->dev->dev_addr,
+ hdr->addr1) != 0)
+ continue;
+ } else if (!ieee80211_bssid_match(bssid,
+ sdata->dev->dev_addr)) {
+ if (!rx.u.rx.in_scan)
+ continue;
+ rx.u.rx.ra_match = 0;
+ }
+ if (sdata->dev == local->mdev &&
+ !rx.u.rx.in_scan)
+ /* do not receive anything via
+ * master device when not scanning */
+ continue;
+ break;
+ case IEEE80211_IF_TYPE_WDS:
+ if (bssid ||
+ (rx.fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+ continue;
+ if (compare_ether_addr(sdata->u.wds.remote_addr,
+ hdr->addr2) != 0)
+ continue;
+ break;
+ }
+
+ if (prev) {
+ skb_new = skb_copy(skb, GFP_ATOMIC);
+ if (!skb_new) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "%s: failed to copy "
+ "multicast frame for %s",
+ local->mdev->name, prev->dev->name);
+ continue;
+ }
+ rx.skb = skb_new;
+ rx.dev = prev->dev;
+ rx.sdata = prev;
+ ieee80211_invoke_rx_handlers(local,
+ local->rx_handlers,
+ &rx, sta);
+ }
+ prev = sdata;
+ }
+ if (prev) {
+ rx.skb = skb;
+ rx.dev = prev->dev;
+ rx.sdata = prev;
+ ieee80211_invoke_rx_handlers(local, local->rx_handlers,
+ &rx, sta);
+ } else
+ dev_kfree_skb(skb);
+ read_unlock(&local->sub_if_lock);
+ }
+
+ end:
+ if (sta)
+ sta_info_put(sta);
+}
+EXPORT_SYMBOL(__ieee80211_rx);
+
+static ieee80211_txrx_result
+ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_local *local = tx->local;
+ struct ieee80211_hw_mode *mode = tx->u.tx.mode;
+ struct sk_buff *skb = tx->skb;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u32 load = 0, hdrtime;
+
+ /* TODO: this could be part of tx_status handling, so that the number
+ * of retries would be known; TX rate should in that case be stored
+ * somewhere with the packet */
+
+ /* Estimate total channel use caused by this frame */
+
+ /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values,
+ * 1 usec = 1/8 * (1080 / 10) = 13.5 */
+
+ if (mode->mode == MODE_IEEE80211A ||
+ mode->mode == MODE_ATHEROS_TURBO ||
+ mode->mode == MODE_ATHEROS_TURBOG ||
+ (mode->mode == MODE_IEEE80211G &&
+ tx->u.tx.rate->flags & IEEE80211_RATE_ERP))
+ hdrtime = CHAN_UTIL_HDR_SHORT;
+ else
+ hdrtime = CHAN_UTIL_HDR_LONG;
+
+ load = hdrtime;
+ if (!is_multicast_ether_addr(hdr->addr1))
+ load += hdrtime;
+
+ if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
+ load += 2 * hdrtime;
+ else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
+ load += hdrtime;
+
+ load += skb->len * tx->u.tx.rate->rate_inv;
+
+ if (tx->u.tx.extra_frag) {
+ int i;
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ load += 2 * hdrtime;
+ load += tx->u.tx.extra_frag[i]->len *
+ tx->u.tx.rate->rate;
+ }
+ }
+
+ /* Divide channel_use by 8 to avoid wrapping around the counter */
+ load >>= CHAN_UTIL_SHIFT;
+ local->channel_use_raw += load;
+ if (tx->sta)
+ tx->sta->channel_use_raw += load;
+ tx->sdata->channel_use_raw += load;
+
+ return TXRX_CONTINUE;
+}
+
+
+static ieee80211_txrx_result
+ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_local *local = rx->local;
+ struct sk_buff *skb = rx->skb;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u32 load = 0, hdrtime;
+ struct ieee80211_rate *rate;
+ struct ieee80211_hw_mode *mode = local->hw.conf.mode;
+ int i;
+
+ /* Estimate total channel use caused by this frame */
+
+ if (unlikely(mode->num_rates < 0))
+ return TXRX_CONTINUE;
+
+ rate = &mode->rates[0];
+ for (i = 0; i < mode->num_rates; i++) {
+ if (mode->rates[i].val == rx->u.rx.status->rate) {
+ rate = &mode->rates[i];
+ break;
+ }
+ }
+
+ /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values,
+ * 1 usec = 1/8 * (1080 / 10) = 13.5 */
+
+ if (mode->mode == MODE_IEEE80211A ||
+ mode->mode == MODE_ATHEROS_TURBO ||
+ mode->mode == MODE_ATHEROS_TURBOG ||
+ (mode->mode == MODE_IEEE80211G &&
+ rate->flags & IEEE80211_RATE_ERP))
+ hdrtime = CHAN_UTIL_HDR_SHORT;
+ else
+ hdrtime = CHAN_UTIL_HDR_LONG;
+
+ load = hdrtime;
+ if (!is_multicast_ether_addr(hdr->addr1))
+ load += hdrtime;
+
+ load += skb->len * rate->rate_inv;
+
+ /* Divide channel_use by 8 to avoid wrapping around the counter */
+ load >>= CHAN_UTIL_SHIFT;
+ local->channel_use_raw += load;
+ if (rx->sta)
+ rx->sta->channel_use_raw += load;
+ rx->u.rx.load = load;
+
+ return TXRX_CONTINUE;
+}
+
+static ieee80211_txrx_result
+ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx)
+{
+ rx->sdata->channel_use_raw += rx->u.rx.load;
+ return TXRX_CONTINUE;
+}
+
+static void ieee80211_stat_refresh(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *) data;
+ struct sta_info *sta;
+ struct ieee80211_sub_if_data *sdata;
+
+ if (!local->stat_time)
+ return;
+
+ /* go through all stations */
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ sta->channel_use = (sta->channel_use_raw / local->stat_time) /
+ CHAN_UTIL_PER_10MS;
+ sta->channel_use_raw = 0;
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ /* go through all subinterfaces */
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ sdata->channel_use = (sdata->channel_use_raw /
+ local->stat_time) / CHAN_UTIL_PER_10MS;
+ sdata->channel_use_raw = 0;
+ }
+ read_unlock(&local->sub_if_lock);
+
+ /* hardware interface */
+ local->channel_use = (local->channel_use_raw /
+ local->stat_time) / CHAN_UTIL_PER_10MS;
+ local->channel_use_raw = 0;
+
+ local->stat_timer.expires = jiffies + HZ * local->stat_time / 100;
+ add_timer(&local->stat_timer);
+}
+
+
+/* This is a version of the rx handler that can be called from hard irq
+ * context. Post the skb on the queue and schedule the tasklet */
+void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb,
+ struct ieee80211_rx_status *status)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb));
+
+ skb->dev = local->mdev;
+ /* copy status into skb->cb for use by tasklet */
+ memcpy(skb->cb, status, sizeof(*status));
+ skb->pkt_type = IEEE80211_RX_MSG;
+ skb_queue_tail(&local->skb_queue, skb);
+ tasklet_schedule(&local->tasklet);
+}
+EXPORT_SYMBOL(ieee80211_rx_irqsafe);
+
+void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
+ struct sk_buff *skb,
+ struct ieee80211_tx_status *status)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_tx_status *saved;
+ int tmp;
+
+ skb->dev = local->mdev;
+ saved = kmalloc(sizeof(struct ieee80211_tx_status), GFP_ATOMIC);
+ if (unlikely(!saved)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s: Not enough memory, "
+ "dropping tx status", skb->dev->name);
+ /* should be dev_kfree_skb_irq, but due to this function being
+ * named _irqsafe instead of just _irq we can't be sure that
+ * people won't call it from non-irq contexts */
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ memcpy(saved, status, sizeof(struct ieee80211_tx_status));
+ /* copy pointer to saved status into skb->cb for use by tasklet */
+ memcpy(skb->cb, &saved, sizeof(saved));
+
+ skb->pkt_type = IEEE80211_TX_STATUS_MSG;
+ skb_queue_tail(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS ?
+ &local->skb_queue : &local->skb_queue_unreliable, skb);
+ tmp = skb_queue_len(&local->skb_queue) +
+ skb_queue_len(&local->skb_queue_unreliable);
+ while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT &&
+ (skb = skb_dequeue(&local->skb_queue_unreliable))) {
+ memcpy(&saved, skb->cb, sizeof(saved));
+ kfree(saved);
+ dev_kfree_skb_irq(skb);
+ tmp--;
+ I802_DEBUG_INC(local->tx_status_drop);
+ }
+ tasklet_schedule(&local->tasklet);
+}
+EXPORT_SYMBOL(ieee80211_tx_status_irqsafe);
+
+static void ieee80211_tasklet_handler(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *) data;
+ struct sk_buff *skb;
+ struct ieee80211_rx_status rx_status;
+ struct ieee80211_tx_status *tx_status;
+
+ while ((skb = skb_dequeue(&local->skb_queue)) ||
+ (skb = skb_dequeue(&local->skb_queue_unreliable))) {
+ switch (skb->pkt_type) {
+ case IEEE80211_RX_MSG:
+ /* status is in skb->cb */
+ memcpy(&rx_status, skb->cb, sizeof(rx_status));
+ /* Clear skb->type in order to not confuse kernel
+ * netstack. */
+ skb->pkt_type = 0;
+ __ieee80211_rx(local_to_hw(local), skb, &rx_status);
+ break;
+ case IEEE80211_TX_STATUS_MSG:
+ /* get pointer to saved status out of skb->cb */
+ memcpy(&tx_status, skb->cb, sizeof(tx_status));
+ skb->pkt_type = 0;
+ ieee80211_tx_status(local_to_hw(local),
+ skb, tx_status);
+ kfree(tx_status);
+ break;
+ default: /* should never get here! */
+ printk(KERN_ERR "%s: Unknown message type (%d)\n",
+ local->mdev->name, skb->pkt_type);
+ dev_kfree_skb(skb);
+ break;
+ }
+ }
+}
+
+
+/* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to
+ * make a prepared TX frame (one that has been given to hw) to look like brand
+ * new IEEE 802.11 frame that is ready to go through TX processing again.
+ * Also, tx_packet_data in cb is restored from tx_control. */
+static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
+ struct ieee80211_key *key,
+ struct sk_buff *skb,
+ struct ieee80211_tx_control *control)
+{
+ int hdrlen, iv_len, mic_len;
+ struct ieee80211_tx_packet_data *pkt_data;
+
+ pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
+ pkt_data->ifindex = control->ifindex;
+ pkt_data->mgmt_iface = (control->type == IEEE80211_IF_TYPE_MGMT);
+ pkt_data->req_tx_status = !!(control->flags & IEEE80211_TXCTL_REQ_TX_STATUS);
+ pkt_data->do_not_encrypt = !!(control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT);
+ pkt_data->requeue = !!(control->flags & IEEE80211_TXCTL_REQUEUE);
+ pkt_data->queue = control->queue;
+
+ hdrlen = ieee80211_get_hdrlen_from_skb(skb);
+
+ if (!key)
+ goto no_key;
+
+ switch (key->alg) {
+ case ALG_WEP:
+ iv_len = WEP_IV_LEN;
+ mic_len = WEP_ICV_LEN;
+ break;
+ case ALG_TKIP:
+ iv_len = TKIP_IV_LEN;
+ mic_len = TKIP_ICV_LEN;
+ break;
+ case ALG_CCMP:
+ iv_len = CCMP_HDR_LEN;
+ mic_len = CCMP_MIC_LEN;
+ break;
+ default:
+ goto no_key;
+ }
+
+ if (skb->len >= mic_len && key->force_sw_encrypt)
+ skb_trim(skb, skb->len - mic_len);
+ if (skb->len >= iv_len && skb->len > hdrlen) {
+ memmove(skb->data + iv_len, skb->data, hdrlen);
+ skb_pull(skb, iv_len);
+ }
+
+no_key:
+ {
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc = le16_to_cpu(hdr->frame_control);
+ if ((fc & 0x8C) == 0x88) /* QoS Control Field */ {
+ fc &= ~IEEE80211_STYPE_QOS_DATA;
+ hdr->frame_control = cpu_to_le16(fc);
+ memmove(skb->data + 2, skb->data, hdrlen - 2);
+ skb_pull(skb, 2);
+ }
+ }
+}
+
+
+void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
+ struct ieee80211_tx_status *status)
+{
+ struct sk_buff *skb2;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_local *local = hw_to_local(hw);
+ u16 frag, type;
+ u32 msg_type;
+
+ if (!status) {
+ printk(KERN_ERR
+ "%s: ieee80211_tx_status called with NULL status\n",
+ local->mdev->name);
+ dev_kfree_skb(skb);
+ return;
+ }
+
+ if (status->excessive_retries) {
+ struct sta_info *sta;
+ sta = sta_info_get(local, hdr->addr1);
+ if (sta) {
+ if (sta->flags & WLAN_STA_PS) {
+ /* The STA is in power save mode, so assume
+ * that this TX packet failed because of that.
+ */
+ status->excessive_retries = 0;
+ status->flags |= IEEE80211_TX_STATUS_TX_FILTERED;
+ }
+ sta_info_put(sta);
+ }
+ }
+
+ if (status->flags & IEEE80211_TX_STATUS_TX_FILTERED) {
+ struct sta_info *sta;
+ sta = sta_info_get(local, hdr->addr1);
+ if (sta) {
+ sta->tx_filtered_count++;
+
+ /* Clear the TX filter mask for this STA when sending
+ * the next packet. If the STA went to power save mode,
+ * this will happen when it is waking up for the next
+ * time. */
+ sta->clear_dst_mask = 1;
+
+ /* TODO: Is the WLAN_STA_PS flag always set here or is
+ * the race between RX and TX status causing some
+ * packets to be filtered out before 80211.o gets an
+ * update for PS status? This seems to be the case, so
+ * no changes are likely to be needed. */
+ if (sta->flags & WLAN_STA_PS &&
+ skb_queue_len(&sta->tx_filtered) <
+ STA_MAX_TX_BUFFER) {
+ ieee80211_remove_tx_extra(local, sta->key,
+ skb,
+ &status->control);
+ skb_queue_tail(&sta->tx_filtered, skb);
+ } else if (!(sta->flags & WLAN_STA_PS) &&
+ !(status->control.flags & IEEE80211_TXCTL_REQUEUE)) {
+ /* Software retry the packet once */
+ status->control.flags |= IEEE80211_TXCTL_REQUEUE;
+ ieee80211_remove_tx_extra(local, sta->key,
+ skb,
+ &status->control);
+ dev_queue_xmit(skb);
+ } else {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: dropped TX "
+ "filtered frame queue_len=%d "
+ "PS=%d @%lu\n",
+ local->mdev->name,
+ skb_queue_len(
+ &sta->tx_filtered),
+ !!(sta->flags & WLAN_STA_PS),
+ jiffies);
+ }
+ dev_kfree_skb(skb);
+ }
+ sta_info_put(sta);
+ return;
+ }
+ } else {
+ /* FIXME: STUPID to call this with both local and local->mdev */
+ rate_control_tx_status(local, local->mdev, skb, status);
+ }
+
+ ieee80211_led_tx(local, 0);
+
+ /* SNMP counters
+ * Fragments are passed to low-level drivers as separate skbs, so these
+ * are actually fragments, not frames. Update frame counters only for
+ * the first fragment of the frame. */
+
+ frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
+ type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE;
+
+ if (status->flags & IEEE80211_TX_STATUS_ACK) {
+ if (frag == 0) {
+ local->dot11TransmittedFrameCount++;
+ if (is_multicast_ether_addr(hdr->addr1))
+ local->dot11MulticastTransmittedFrameCount++;
+ if (status->retry_count > 0)
+ local->dot11RetryCount++;
+ if (status->retry_count > 1)
+ local->dot11MultipleRetryCount++;
+ }
+
+ /* This counter shall be incremented for an acknowledged MPDU
+ * with an individual address in the address 1 field or an MPDU
+ * with a multicast address in the address 1 field of type Data
+ * or Management. */
+ if (!is_multicast_ether_addr(hdr->addr1) ||
+ type == IEEE80211_FTYPE_DATA ||
+ type == IEEE80211_FTYPE_MGMT)
+ local->dot11TransmittedFragmentCount++;
+ } else {
+ if (frag == 0)
+ local->dot11FailedCount++;
+ }
+
+ if (!(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS)
+ || unlikely(!local->apdev)) {
+ dev_kfree_skb(skb);
+ return;
+ }
+
+ msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ?
+ ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail;
+
+ /* skb was the original skb used for TX. Clone it and give the clone
+ * to netif_rx(). Free original skb. */
+ skb2 = skb_copy(skb, GFP_ATOMIC);
+ if (!skb2) {
+ dev_kfree_skb(skb);
+ return;
+ }
+ dev_kfree_skb(skb);
+ skb = skb2;
+
+ /* Send frame to hostapd */
+ ieee80211_rx_mgmt(local, skb, NULL, msg_type);
+}
+EXPORT_SYMBOL(ieee80211_tx_status);
+
+/* TODO: implement register/unregister functions for adding TX/RX handlers
+ * into ordered list */
+
+/* rx_pre handlers don't have dev and sdata fields available in
+ * ieee80211_txrx_data */
+static ieee80211_rx_handler ieee80211_rx_pre_handlers[] =
+{
+ ieee80211_rx_h_parse_qos,
+ ieee80211_rx_h_load_stats,
+ NULL
+};
+
+static ieee80211_rx_handler ieee80211_rx_handlers[] =
+{
+ ieee80211_rx_h_if_stats,
+ ieee80211_rx_h_monitor,
+ ieee80211_rx_h_passive_scan,
+ ieee80211_rx_h_check,
+ ieee80211_rx_h_sta_process,
+ ieee80211_rx_h_ccmp_decrypt,
+ ieee80211_rx_h_tkip_decrypt,
+ ieee80211_rx_h_wep_weak_iv_detection,
+ ieee80211_rx_h_wep_decrypt,
+ ieee80211_rx_h_defragment,
+ ieee80211_rx_h_ps_poll,
+ ieee80211_rx_h_michael_mic_verify,
+ /* this must be after decryption - so header is counted in MPDU mic
+ * must be before pae and data, so QOS_DATA format frames
+ * are not passed to user space by these functions
+ */
+ ieee80211_rx_h_remove_qos_control,
+ ieee80211_rx_h_802_1x_pae,
+ ieee80211_rx_h_drop_unencrypted,
+ ieee80211_rx_h_data,
+ ieee80211_rx_h_mgmt,
+ NULL
+};
+
+static ieee80211_tx_handler ieee80211_tx_handlers[] =
+{
+ ieee80211_tx_h_check_assoc,
+ ieee80211_tx_h_sequence,
+ ieee80211_tx_h_ps_buf,
+ ieee80211_tx_h_select_key,
+ ieee80211_tx_h_michael_mic_add,
+ ieee80211_tx_h_fragment,
+ ieee80211_tx_h_tkip_encrypt,
+ ieee80211_tx_h_ccmp_encrypt,
+ ieee80211_tx_h_wep_encrypt,
+ ieee80211_tx_h_rate_ctrl,
+ ieee80211_tx_h_misc,
+ ieee80211_tx_h_load_stats,
+ NULL
+};
+
+
+int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct sta_info *sta;
+
+ if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0)
+ return 0;
+
+ /* Create STA entry for the new peer */
+ sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL);
+ if (!sta)
+ return -ENOMEM;
+ sta_info_put(sta);
+
+ /* Remove STA entry for the old peer */
+ sta = sta_info_get(local, sdata->u.wds.remote_addr);
+ if (sta) {
+ sta_info_put(sta);
+ sta_info_free(sta, 0);
+ } else {
+ printk(KERN_DEBUG "%s: could not find STA entry for WDS link "
+ "peer " MAC_FMT "\n",
+ dev->name, MAC_ARG(sdata->u.wds.remote_addr));
+ }
+
+ /* Update WDS link data */
+ memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN);
+
+ return 0;
+}
+
+/* Must not be called for mdev and apdev */
+void ieee80211_if_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+ dev->hard_start_xmit = ieee80211_subif_start_xmit;
+ dev->wireless_handlers = &ieee80211_iw_handler_def;
+ dev->set_multicast_list = ieee80211_set_multicast_list;
+ dev->change_mtu = ieee80211_change_mtu;
+ dev->get_stats = ieee80211_get_stats;
+ dev->open = ieee80211_open;
+ dev->stop = ieee80211_stop;
+ dev->uninit = ieee80211_if_reinit;
+ dev->destructor = ieee80211_if_free;
+}
+
+void ieee80211_if_mgmt_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+ dev->hard_start_xmit = ieee80211_mgmt_start_xmit;
+ dev->change_mtu = ieee80211_change_mtu_apdev;
+ dev->get_stats = ieee80211_get_stats;
+ dev->open = ieee80211_mgmt_open;
+ dev->stop = ieee80211_mgmt_stop;
+ dev->type = ARPHRD_IEEE80211_PRISM;
+ dev->hard_header_parse = header_parse_80211;
+ dev->uninit = ieee80211_if_reinit;
+ dev->destructor = ieee80211_if_free;
+}
+
+int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
+ const char *name)
+{
+ struct rate_control_ref *ref, *old;
+
+ ASSERT_RTNL();
+ if (local->open_count || netif_running(local->mdev) ||
+ (local->apdev && netif_running(local->apdev)))
+ return -EBUSY;
+
+ ref = rate_control_alloc(name, local);
+ if (!ref) {
+ printk(KERN_WARNING "%s: Failed to select rate control "
+ "algorithm\n", local->mdev->name);
+ return -ENOENT;
+ }
+
+ old = local->rate_ctrl;
+ local->rate_ctrl = ref;
+ if (old) {
+ rate_control_put(old);
+ sta_info_flush(local, NULL);
+ }
+
+ printk(KERN_DEBUG "%s: Selected rate control "
+ "algorithm '%s'\n", local->mdev->name,
+ ref->ops->name);
+
+
+ return 0;
+}
+
+static void rate_control_deinitialize(struct ieee80211_local *local)
+{
+ struct rate_control_ref *ref;
+
+ ref = local->rate_ctrl;
+ local->rate_ctrl = NULL;
+ rate_control_put(ref);
+}
+
+struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
+ const struct ieee80211_ops *ops)
+{
+ struct net_device *mdev;
+ struct ieee80211_local *local;
+ struct ieee80211_sub_if_data *sdata;
+ int priv_size;
+ struct wiphy *wiphy;
+
+ /* Ensure 32-byte alignment of our private data and hw private data.
+ * We use the wiphy priv data for both our ieee80211_local and for
+ * the driver's private data
+ *
+ * In memory it'll be like this:
+ *
+ * +-------------------------+
+ * | struct wiphy |
+ * +-------------------------+
+ * | struct ieee80211_local |
+ * +-------------------------+
+ * | driver's private data |
+ * +-------------------------+
+ *
+ */
+ priv_size = ((sizeof(struct ieee80211_local) +
+ NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST) +
+ priv_data_len;
+
+ wiphy = wiphy_new(&mac80211_config_ops, priv_size);
+
+ if (!wiphy)
+ return NULL;
+
+ wiphy->privid = mac80211_wiphy_privid;
+
+ local = wiphy_priv(wiphy);
+ local->hw.wiphy = wiphy;
+
+ local->hw.priv = (char *)local +
+ ((sizeof(struct ieee80211_local) +
+ NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+
+ local->ops = ops;
+
+ /* for now, mdev needs sub_if_data :/ */
+ mdev = alloc_netdev(sizeof(struct ieee80211_sub_if_data),
+ "wmaster%d", ether_setup);
+ if (!mdev) {
+ wiphy_free(wiphy);
+ return NULL;
+ }
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(mdev);
+ mdev->ieee80211_ptr = &sdata->wdev;
+ sdata->wdev.wiphy = wiphy;
+
+ local->hw.queues = 1; /* default */
+
+ local->mdev = mdev;
+ local->rx_pre_handlers = ieee80211_rx_pre_handlers;
+ local->rx_handlers = ieee80211_rx_handlers;
+ local->tx_handlers = ieee80211_tx_handlers;
+
+ local->bridge_packets = 1;
+
+ local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
+ local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
+ local->short_retry_limit = 7;
+ local->long_retry_limit = 4;
+ local->hw.conf.radio_enabled = 1;
+ local->rate_ctrl_num_up = RATE_CONTROL_NUM_UP;
+ local->rate_ctrl_num_down = RATE_CONTROL_NUM_DOWN;
+
+ local->enabled_modes = (unsigned int) -1;
+
+ INIT_LIST_HEAD(&local->modes_list);
+
+ rwlock_init(&local->sub_if_lock);
+ INIT_LIST_HEAD(&local->sub_if_list);
+
+ INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work);
+ init_timer(&local->stat_timer);
+ local->stat_timer.function = ieee80211_stat_refresh;
+ local->stat_timer.data = (unsigned long) local;
+ ieee80211_rx_bss_list_init(mdev);
+
+ sta_info_init(local);
+
+ mdev->hard_start_xmit = ieee80211_master_start_xmit;
+ mdev->open = ieee80211_master_open;
+ mdev->stop = ieee80211_master_stop;
+ mdev->type = ARPHRD_IEEE80211;
+ mdev->hard_header_parse = header_parse_80211;
+
+ sdata->type = IEEE80211_IF_TYPE_AP;
+ sdata->dev = mdev;
+ sdata->local = local;
+ sdata->u.ap.force_unicast_rateidx = -1;
+ sdata->u.ap.max_ratectrl_rateidx = -1;
+ ieee80211_if_sdata_init(sdata);
+ list_add_tail(&sdata->list, &local->sub_if_list);
+
+ tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
+ (unsigned long)local);
+ tasklet_disable(&local->tx_pending_tasklet);
+
+ tasklet_init(&local->tasklet,
+ ieee80211_tasklet_handler,
+ (unsigned long) local);
+ tasklet_disable(&local->tasklet);
+
+ skb_queue_head_init(&local->skb_queue);
+ skb_queue_head_init(&local->skb_queue_unreliable);
+
+ return local_to_hw(local);
+}
+EXPORT_SYMBOL(ieee80211_alloc_hw);
+
+int ieee80211_register_hw(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ const char *name;
+ int result;
+
+ result = wiphy_register(local->hw.wiphy);
+ if (result < 0)
+ return result;
+
+ name = wiphy_dev(local->hw.wiphy)->driver->name;
+ local->hw.workqueue = create_singlethread_workqueue(name);
+ if (!local->hw.workqueue) {
+ result = -ENOMEM;
+ goto fail_workqueue;
+ }
+
+ debugfs_hw_add(local);
+
+ local->hw.conf.beacon_int = 1000;
+
+ local->wstats_flags |= local->hw.max_rssi ?
+ IW_QUAL_LEVEL_UPDATED : IW_QUAL_LEVEL_INVALID;
+ local->wstats_flags |= local->hw.max_signal ?
+ IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID;
+ local->wstats_flags |= local->hw.max_noise ?
+ IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID;
+ if (local->hw.max_rssi < 0 || local->hw.max_noise < 0)
+ local->wstats_flags |= IW_QUAL_DBM;
+
+ result = sta_info_start(local);
+ if (result < 0)
+ goto fail_sta_info;
+
+ rtnl_lock();
+ result = dev_alloc_name(local->mdev, local->mdev->name);
+ if (result < 0)
+ goto fail_dev;
+
+ memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+ SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy));
+
+ result = register_netdevice(local->mdev);
+ if (result < 0)
+ goto fail_dev;
+
+ ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
+
+ result = ieee80211_init_rate_ctrl_alg(local, NULL);
+ if (result < 0) {
+ printk(KERN_DEBUG "%s: Failed to initialize rate control "
+ "algorithm\n", local->mdev->name);
+ goto fail_rate;
+ }
+
+ result = ieee80211_wep_init(local);
+
+ if (result < 0) {
+ printk(KERN_DEBUG "%s: Failed to initialize wep\n",
+ local->mdev->name);
+ goto fail_wep;
+ }
+
+ ieee80211_install_qdisc(local->mdev);
+
+ /* add one default STA interface */
+ result = ieee80211_if_add(local->mdev, "wlan%d", NULL,
+ IEEE80211_IF_TYPE_STA);
+ if (result)
+ printk(KERN_WARNING "%s: Failed to add default virtual iface\n",
+ local->mdev->name);
+
+ local->reg_state = IEEE80211_DEV_REGISTERED;
+ rtnl_unlock();
+
+ ieee80211_led_init(local);
+
+ return 0;
+
+fail_wep:
+ rate_control_deinitialize(local);
+fail_rate:
+ ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
+ unregister_netdevice(local->mdev);
+fail_dev:
+ rtnl_unlock();
+ sta_info_stop(local);
+fail_sta_info:
+ debugfs_hw_del(local);
+ destroy_workqueue(local->hw.workqueue);
+fail_workqueue:
+ wiphy_unregister(local->hw.wiphy);
+ return result;
+}
+EXPORT_SYMBOL(ieee80211_register_hw);
+
+int ieee80211_register_hwmode(struct ieee80211_hw *hw,
+ struct ieee80211_hw_mode *mode)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_rate *rate;
+ int i;
+
+ INIT_LIST_HEAD(&mode->list);
+ list_add_tail(&mode->list, &local->modes_list);
+
+ local->hw_modes |= (1 << mode->mode);
+ for (i = 0; i < mode->num_rates; i++) {
+ rate = &(mode->rates[i]);
+ rate->rate_inv = CHAN_UTIL_RATE_LCM / rate->rate;
+ }
+ ieee80211_prepare_rates(local, mode);
+
+ if (!local->oper_hw_mode) {
+ /* Default to this mode */
+ local->hw.conf.phymode = mode->mode;
+ local->oper_hw_mode = local->scan_hw_mode = mode;
+ local->oper_channel = local->scan_channel = &mode->channels[0];
+ local->hw.conf.mode = local->oper_hw_mode;
+ local->hw.conf.chan = local->oper_channel;
+ }
+
+ if (!(hw->flags & IEEE80211_HW_DEFAULT_REG_DOMAIN_CONFIGURED))
+ ieee80211_init_client(local->mdev);
+
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_register_hwmode);
+
+void ieee80211_unregister_hw(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata, *tmp;
+ struct list_head tmp_list;
+ int i;
+
+ tasklet_kill(&local->tx_pending_tasklet);
+ tasklet_kill(&local->tasklet);
+
+ rtnl_lock();
+
+ BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED);
+
+ local->reg_state = IEEE80211_DEV_UNREGISTERED;
+ if (local->apdev)
+ ieee80211_if_del_mgmt(local);
+
+ write_lock_bh(&local->sub_if_lock);
+ list_replace_init(&local->sub_if_list, &tmp_list);
+ write_unlock_bh(&local->sub_if_lock);
+
+ list_for_each_entry_safe(sdata, tmp, &tmp_list, list)
+ __ieee80211_if_del(local, sdata);
+
+ rtnl_unlock();
+
+ if (local->stat_time)
+ del_timer_sync(&local->stat_timer);
+
+ ieee80211_rx_bss_list_deinit(local->mdev);
+ ieee80211_clear_tx_pending(local);
+ sta_info_stop(local);
+ rate_control_deinitialize(local);
+ debugfs_hw_del(local);
+
+ for (i = 0; i < NUM_IEEE80211_MODES; i++) {
+ kfree(local->supp_rates[i]);
+ kfree(local->basic_rates[i]);
+ }
+
+ if (skb_queue_len(&local->skb_queue)
+ || skb_queue_len(&local->skb_queue_unreliable))
+ printk(KERN_WARNING "%s: skb_queue not empty\n",
+ local->mdev->name);
+ skb_queue_purge(&local->skb_queue);
+ skb_queue_purge(&local->skb_queue_unreliable);
+
+ destroy_workqueue(local->hw.workqueue);
+ wiphy_unregister(local->hw.wiphy);
+ ieee80211_wep_free(local);
+ ieee80211_led_exit(local);
+}
+EXPORT_SYMBOL(ieee80211_unregister_hw);
+
+void ieee80211_free_hw(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ ieee80211_if_free(local->mdev);
+ wiphy_free(local->hw.wiphy);
+}
+EXPORT_SYMBOL(ieee80211_free_hw);
+
+void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF,
+ &local->state[queue])) {
+ if (test_bit(IEEE80211_LINK_STATE_PENDING,
+ &local->state[queue]))
+ tasklet_schedule(&local->tx_pending_tasklet);
+ else
+ if (!ieee80211_qdisc_installed(local->mdev)) {
+ if (queue == 0)
+ netif_wake_queue(local->mdev);
+ } else
+ __netif_schedule(local->mdev);
+ }
+}
+EXPORT_SYMBOL(ieee80211_wake_queue);
+
+void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (!ieee80211_qdisc_installed(local->mdev) && queue == 0)
+ netif_stop_queue(local->mdev);
+ set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]);
+}
+EXPORT_SYMBOL(ieee80211_stop_queue);
+
+void ieee80211_start_queues(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ int i;
+
+ for (i = 0; i < local->hw.queues; i++)
+ clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]);
+ if (!ieee80211_qdisc_installed(local->mdev))
+ netif_start_queue(local->mdev);
+}
+EXPORT_SYMBOL(ieee80211_start_queues);
+
+void ieee80211_stop_queues(struct ieee80211_hw *hw)
+{
+ int i;
+
+ for (i = 0; i < hw->queues; i++)
+ ieee80211_stop_queue(hw, i);
+}
+EXPORT_SYMBOL(ieee80211_stop_queues);
+
+void ieee80211_wake_queues(struct ieee80211_hw *hw)
+{
+ int i;
+
+ for (i = 0; i < hw->queues; i++)
+ ieee80211_wake_queue(hw, i);
+}
+EXPORT_SYMBOL(ieee80211_wake_queues);
+
+struct net_device_stats *ieee80211_dev_stats(struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ return &sdata->stats;
+}
+
+static int __init ieee80211_init(void)
+{
+ struct sk_buff *skb;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
+
+ ret = ieee80211_wme_register();
+ if (ret) {
+ printk(KERN_DEBUG "ieee80211_init: failed to "
+ "initialize WME (err=%d)\n", ret);
+ return ret;
+ }
+
+ ieee80211_debugfs_netdev_init();
+
+ return 0;
+}
+
+
+static void __exit ieee80211_exit(void)
+{
+ ieee80211_wme_unregister();
+ ieee80211_debugfs_netdev_exit();
+}
+
+
+module_init(ieee80211_init);
+module_exit(ieee80211_exit);
+
+MODULE_DESCRIPTION("IEEE 802.11 subsystem");
+MODULE_LICENSE("GPL");
diff --git a/net/mac80211/ieee80211_cfg.c b/net/mac80211/ieee80211_cfg.c
new file mode 100644
index 000000000000..509096edb324
--- /dev/null
+++ b/net/mac80211/ieee80211_cfg.c
@@ -0,0 +1,66 @@
+/*
+ * mac80211 configuration hooks for cfg80211
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/nl80211.h>
+#include <linux/rtnetlink.h>
+#include <net/cfg80211.h>
+#include "ieee80211_i.h"
+#include "ieee80211_cfg.h"
+
+static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
+ unsigned int type)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ int itype;
+
+ if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED))
+ return -ENODEV;
+
+ switch (type) {
+ case NL80211_IFTYPE_UNSPECIFIED:
+ itype = IEEE80211_IF_TYPE_STA;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ itype = IEEE80211_IF_TYPE_IBSS;
+ break;
+ case NL80211_IFTYPE_STATION:
+ itype = IEEE80211_IF_TYPE_STA;
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ itype = IEEE80211_IF_TYPE_MNTR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ieee80211_if_add(local->mdev, name, NULL, itype);
+}
+
+static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct net_device *dev;
+ char *name;
+
+ if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED))
+ return -ENODEV;
+
+ dev = dev_get_by_index(ifindex);
+ if (!dev)
+ return 0;
+
+ name = dev->name;
+ dev_put(dev);
+
+ return ieee80211_if_remove(local->mdev, name, -1);
+}
+
+struct cfg80211_ops mac80211_config_ops = {
+ .add_virtual_intf = ieee80211_add_iface,
+ .del_virtual_intf = ieee80211_del_iface,
+};
diff --git a/net/mac80211/ieee80211_cfg.h b/net/mac80211/ieee80211_cfg.h
new file mode 100644
index 000000000000..85ed2c924878
--- /dev/null
+++ b/net/mac80211/ieee80211_cfg.h
@@ -0,0 +1,9 @@
+/*
+ * mac80211 configuration hooks for cfg80211
+ */
+#ifndef __IEEE80211_CFG_H
+#define __IEEE80211_CFG_H
+
+extern struct cfg80211_ops mac80211_config_ops;
+
+#endif /* __IEEE80211_CFG_H */
diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h
new file mode 100644
index 000000000000..b9a73e7f5f75
--- /dev/null
+++ b/net/mac80211/ieee80211_common.h
@@ -0,0 +1,98 @@
+/*
+ * IEEE 802.11 driver (80211.o) -- hostapd interface
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef IEEE80211_COMMON_H
+#define IEEE80211_COMMON_H
+
+#include <linux/types.h>
+
+/*
+ * This is common header information with user space. It is used on all
+ * frames sent to wlan#ap interface.
+ */
+
+#define IEEE80211_FI_VERSION 0x80211001
+
+struct ieee80211_frame_info {
+ __be32 version;
+ __be32 length;
+ __be64 mactime;
+ __be64 hosttime;
+ __be32 phytype;
+ __be32 channel;
+ __be32 datarate;
+ __be32 antenna;
+ __be32 priority;
+ __be32 ssi_type;
+ __be32 ssi_signal;
+ __be32 ssi_noise;
+ __be32 preamble;
+ __be32 encoding;
+
+ /* Note: this structure is otherwise identical to capture format used
+ * in linux-wlan-ng, but this additional field is used to provide meta
+ * data about the frame to hostapd. This was the easiest method for
+ * providing this information, but this might change in the future. */
+ __be32 msg_type;
+} __attribute__ ((packed));
+
+
+enum ieee80211_msg_type {
+ ieee80211_msg_normal = 0,
+ ieee80211_msg_tx_callback_ack = 1,
+ ieee80211_msg_tx_callback_fail = 2,
+ ieee80211_msg_passive_scan = 3,
+ ieee80211_msg_wep_frame_unknown_key = 4,
+ ieee80211_msg_michael_mic_failure = 5,
+ /* hole at 6, was monitor but never sent to userspace */
+ ieee80211_msg_sta_not_assoc = 7,
+ ieee80211_msg_set_aid_for_sta = 8 /* used by Intersil MVC driver */,
+ ieee80211_msg_key_threshold_notification = 9,
+ ieee80211_msg_radar = 11,
+};
+
+struct ieee80211_msg_set_aid_for_sta {
+ char sta_address[ETH_ALEN];
+ u16 aid;
+};
+
+struct ieee80211_msg_key_notification {
+ int tx_rx_count;
+ char ifname[IFNAMSIZ];
+ u8 addr[ETH_ALEN]; /* ff:ff:ff:ff:ff:ff for broadcast keys */
+};
+
+
+enum ieee80211_phytype {
+ ieee80211_phytype_fhss_dot11_97 = 1,
+ ieee80211_phytype_dsss_dot11_97 = 2,
+ ieee80211_phytype_irbaseband = 3,
+ ieee80211_phytype_dsss_dot11_b = 4,
+ ieee80211_phytype_pbcc_dot11_b = 5,
+ ieee80211_phytype_ofdm_dot11_g = 6,
+ ieee80211_phytype_pbcc_dot11_g = 7,
+ ieee80211_phytype_ofdm_dot11_a = 8,
+ ieee80211_phytype_dsss_dot11_turbog = 255,
+ ieee80211_phytype_dsss_dot11_turbo = 256,
+};
+
+enum ieee80211_ssi_type {
+ ieee80211_ssi_none = 0,
+ ieee80211_ssi_norm = 1, /* normalized, 0-1000 */
+ ieee80211_ssi_dbm = 2,
+ ieee80211_ssi_raw = 3, /* raw SSI */
+};
+
+struct ieee80211_radar_info {
+ int channel;
+ int radar;
+ int radar_type;
+};
+
+#endif /* IEEE80211_COMMON_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
new file mode 100644
index 000000000000..af4d14d0b969
--- /dev/null
+++ b/net/mac80211/ieee80211_i.h
@@ -0,0 +1,798 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef IEEE80211_I_H
+#define IEEE80211_I_H
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/if_ether.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <net/wireless.h>
+#include "ieee80211_key.h"
+#include "sta_info.h"
+
+/* ieee80211.o internal definitions, etc. These are not included into
+ * low-level drivers. */
+
+#ifndef ETH_P_PAE
+#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
+#endif /* ETH_P_PAE */
+
+#define WLAN_FC_DATA_PRESENT(fc) (((fc) & 0x4c) == 0x08)
+
+struct ieee80211_local;
+
+#define BIT(x) (1 << (x))
+
+#define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3)
+
+/* Maximum number of broadcast/multicast frames to buffer when some of the
+ * associated stations are using power saving. */
+#define AP_MAX_BC_BUFFER 128
+
+/* Maximum number of frames buffered to all STAs, including multicast frames.
+ * Note: increasing this limit increases the potential memory requirement. Each
+ * frame can be up to about 2 kB long. */
+#define TOTAL_MAX_TX_BUFFER 512
+
+/* Required encryption head and tailroom */
+#define IEEE80211_ENCRYPT_HEADROOM 8
+#define IEEE80211_ENCRYPT_TAILROOM 12
+
+/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
+ * reception of at least three fragmented frames. This limit can be increased
+ * by changing this define, at the cost of slower frame reassembly and
+ * increased memory use (about 2 kB of RAM per entry). */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ unsigned long first_frag_time;
+ unsigned int seq;
+ unsigned int rx_queue;
+ unsigned int last_frag;
+ unsigned int extra_len;
+ struct sk_buff_head skb_list;
+ int ccmp; /* Whether fragments were encrypted with CCMP */
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+};
+
+
+struct ieee80211_sta_bss {
+ struct list_head list;
+ struct ieee80211_sta_bss *hnext;
+ atomic_t users;
+
+ u8 bssid[ETH_ALEN];
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ u16 capability; /* host byte order */
+ int hw_mode;
+ int channel;
+ int freq;
+ int rssi, signal, noise;
+ u8 *wpa_ie;
+ size_t wpa_ie_len;
+ u8 *rsn_ie;
+ size_t rsn_ie_len;
+ u8 *wmm_ie;
+ size_t wmm_ie_len;
+#define IEEE80211_MAX_SUPP_RATES 32
+ u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
+ size_t supp_rates_len;
+ int beacon_int;
+ u64 timestamp;
+
+ int probe_resp;
+ unsigned long last_update;
+
+};
+
+
+typedef enum {
+ TXRX_CONTINUE, TXRX_DROP, TXRX_QUEUED
+} ieee80211_txrx_result;
+
+struct ieee80211_txrx_data {
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct ieee80211_local *local;
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+ u16 fc, ethertype;
+ struct ieee80211_key *key;
+ unsigned int fragmented:1; /* whether the MSDU was fragmented */
+ union {
+ struct {
+ struct ieee80211_tx_control *control;
+ unsigned int unicast:1;
+ unsigned int ps_buffered:1;
+ unsigned int short_preamble:1;
+ unsigned int probe_last_frag:1;
+ struct ieee80211_hw_mode *mode;
+ struct ieee80211_rate *rate;
+ /* use this rate (if set) for last fragment; rate can
+ * be set to lower rate for the first fragments, e.g.,
+ * when using CTS protection with IEEE 802.11g. */
+ struct ieee80211_rate *last_frag_rate;
+ int last_frag_hwrate;
+ int mgmt_interface;
+
+ /* Extra fragments (in addition to the first fragment
+ * in skb) */
+ int num_extra_frag;
+ struct sk_buff **extra_frag;
+ } tx;
+ struct {
+ struct ieee80211_rx_status *status;
+ int sent_ps_buffered;
+ int queue;
+ int load;
+ unsigned int in_scan:1;
+ /* frame is destined to interface currently processed
+ * (including multicast frames) */
+ unsigned int ra_match:1;
+ } rx;
+ } u;
+};
+
+/* Stored in sk_buff->cb */
+struct ieee80211_tx_packet_data {
+ int ifindex;
+ unsigned long jiffies;
+ unsigned int req_tx_status:1;
+ unsigned int do_not_encrypt:1;
+ unsigned int requeue:1;
+ unsigned int mgmt_iface:1;
+ unsigned int queue:4;
+};
+
+struct ieee80211_tx_stored_packet {
+ struct ieee80211_tx_control control;
+ struct sk_buff *skb;
+ int num_extra_frag;
+ struct sk_buff **extra_frag;
+ int last_frag_rateidx;
+ int last_frag_hwrate;
+ struct ieee80211_rate *last_frag_rate;
+ unsigned int last_frag_rate_ctrl_probe:1;
+};
+
+typedef ieee80211_txrx_result (*ieee80211_tx_handler)
+(struct ieee80211_txrx_data *tx);
+
+typedef ieee80211_txrx_result (*ieee80211_rx_handler)
+(struct ieee80211_txrx_data *rx);
+
+struct ieee80211_if_ap {
+ u8 *beacon_head, *beacon_tail;
+ int beacon_head_len, beacon_tail_len;
+
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ u8 *generic_elem;
+ size_t generic_elem_len;
+
+ /* yes, this looks ugly, but guarantees that we can later use
+ * bitmap_empty :)
+ * NB: don't ever use set_bit, use bss_tim_set/bss_tim_clear! */
+ u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)];
+ atomic_t num_sta_ps; /* number of stations in PS mode */
+ struct sk_buff_head ps_bc_buf;
+ int dtim_period, dtim_count;
+ int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
+ int max_ratectrl_rateidx; /* max TX rateidx for rate control */
+ int num_beacons; /* number of TXed beacon frames for this BSS */
+};
+
+struct ieee80211_if_wds {
+ u8 remote_addr[ETH_ALEN];
+ struct sta_info *sta;
+};
+
+struct ieee80211_if_vlan {
+ u8 id;
+};
+
+struct ieee80211_if_sta {
+ enum {
+ IEEE80211_DISABLED, IEEE80211_AUTHENTICATE,
+ IEEE80211_ASSOCIATE, IEEE80211_ASSOCIATED,
+ IEEE80211_IBSS_SEARCH, IEEE80211_IBSS_JOINED
+ } state;
+ struct timer_list timer;
+ struct work_struct work;
+ u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ u16 aid;
+ u16 ap_capab, capab;
+ u8 *extra_ie; /* to be added to the end of AssocReq */
+ size_t extra_ie_len;
+
+ /* The last AssocReq/Resp IEs */
+ u8 *assocreq_ies, *assocresp_ies;
+ size_t assocreq_ies_len, assocresp_ies_len;
+
+ int auth_tries, assoc_tries;
+
+ unsigned int ssid_set:1;
+ unsigned int bssid_set:1;
+ unsigned int prev_bssid_set:1;
+ unsigned int authenticated:1;
+ unsigned int associated:1;
+ unsigned int probereq_poll:1;
+ unsigned int use_protection:1;
+ unsigned int create_ibss:1;
+ unsigned int mixed_cell:1;
+ unsigned int wmm_enabled:1;
+ unsigned int auto_ssid_sel:1;
+ unsigned int auto_bssid_sel:1;
+ unsigned int auto_channel_sel:1;
+#define IEEE80211_STA_REQ_SCAN 0
+#define IEEE80211_STA_REQ_AUTH 1
+#define IEEE80211_STA_REQ_RUN 2
+ unsigned long request;
+ struct sk_buff_head skb_queue;
+
+ int key_mgmt;
+ unsigned long last_probe;
+
+#define IEEE80211_AUTH_ALG_OPEN BIT(0)
+#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
+#define IEEE80211_AUTH_ALG_LEAP BIT(2)
+ unsigned int auth_algs; /* bitfield of allowed auth algs */
+ int auth_alg; /* currently used IEEE 802.11 authentication algorithm */
+ int auth_transaction;
+
+ unsigned long ibss_join_req;
+ struct sk_buff *probe_resp; /* ProbeResp template for IBSS */
+ u32 supp_rates_bits;
+
+ int wmm_last_param_set;
+};
+
+
+struct ieee80211_sub_if_data {
+ struct list_head list;
+ unsigned int type;
+
+ struct wireless_dev wdev;
+
+ struct net_device *dev;
+ struct ieee80211_local *local;
+
+ int mc_count;
+ unsigned int allmulti:1;
+ unsigned int promisc:1;
+
+ struct net_device_stats stats;
+ int drop_unencrypted;
+ int eapol; /* 0 = process EAPOL frames as normal data frames,
+ * 1 = send EAPOL frames through wlan#ap to hostapd
+ * (default) */
+ int ieee802_1x; /* IEEE 802.1X PAE - drop packet to/from unauthorized
+ * port */
+
+ u16 sequence;
+
+ /* Fragment table for host-based reassembly */
+ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
+ unsigned int fragment_next;
+
+#define NUM_DEFAULT_KEYS 4
+ struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
+ struct ieee80211_key *default_key;
+
+ struct ieee80211_if_ap *bss; /* BSS that this device belongs to */
+
+ union {
+ struct ieee80211_if_ap ap;
+ struct ieee80211_if_wds wds;
+ struct ieee80211_if_vlan vlan;
+ struct ieee80211_if_sta sta;
+ } u;
+ int channel_use;
+ int channel_use_raw;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct dentry *debugfsdir;
+ union {
+ struct {
+ struct dentry *channel_use;
+ struct dentry *drop_unencrypted;
+ struct dentry *eapol;
+ struct dentry *ieee8021_x;
+ struct dentry *state;
+ struct dentry *bssid;
+ struct dentry *prev_bssid;
+ struct dentry *ssid_len;
+ struct dentry *aid;
+ struct dentry *ap_capab;
+ struct dentry *capab;
+ struct dentry *extra_ie_len;
+ struct dentry *auth_tries;
+ struct dentry *assoc_tries;
+ struct dentry *auth_algs;
+ struct dentry *auth_alg;
+ struct dentry *auth_transaction;
+ struct dentry *flags;
+ } sta;
+ struct {
+ struct dentry *channel_use;
+ struct dentry *drop_unencrypted;
+ struct dentry *eapol;
+ struct dentry *ieee8021_x;
+ struct dentry *num_sta_ps;
+ struct dentry *dtim_period;
+ struct dentry *dtim_count;
+ struct dentry *num_beacons;
+ struct dentry *force_unicast_rateidx;
+ struct dentry *max_ratectrl_rateidx;
+ struct dentry *num_buffered_multicast;
+ struct dentry *beacon_head_len;
+ struct dentry *beacon_tail_len;
+ } ap;
+ struct {
+ struct dentry *channel_use;
+ struct dentry *drop_unencrypted;
+ struct dentry *eapol;
+ struct dentry *ieee8021_x;
+ struct dentry *peer;
+ } wds;
+ struct {
+ struct dentry *channel_use;
+ struct dentry *drop_unencrypted;
+ struct dentry *eapol;
+ struct dentry *ieee8021_x;
+ struct dentry *vlan_id;
+ } vlan;
+ struct {
+ struct dentry *mode;
+ } monitor;
+ struct dentry *default_key;
+ } debugfs;
+#endif
+};
+
+#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev)
+
+enum {
+ IEEE80211_RX_MSG = 1,
+ IEEE80211_TX_STATUS_MSG = 2,
+};
+
+struct ieee80211_local {
+ /* embed the driver visible part.
+ * don't cast (use the static inlines below), but we keep
+ * it first anyway so they become a no-op */
+ struct ieee80211_hw hw;
+
+ const struct ieee80211_ops *ops;
+
+ /* List of registered struct ieee80211_hw_mode */
+ struct list_head modes_list;
+
+ struct net_device *mdev; /* wmaster# - "master" 802.11 device */
+ struct net_device *apdev; /* wlan#ap - management frames (hostapd) */
+ int open_count;
+ int monitors;
+ struct iw_statistics wstats;
+ u8 wstats_flags;
+
+ enum {
+ IEEE80211_DEV_UNINITIALIZED = 0,
+ IEEE80211_DEV_REGISTERED,
+ IEEE80211_DEV_UNREGISTERED,
+ } reg_state;
+
+ /* Tasklet and skb queue to process calls from IRQ mode. All frames
+ * added to skb_queue will be processed, but frames in
+ * skb_queue_unreliable may be dropped if the total length of these
+ * queues increases over the limit. */
+#define IEEE80211_IRQSAFE_QUEUE_LIMIT 128
+ struct tasklet_struct tasklet;
+ struct sk_buff_head skb_queue;
+ struct sk_buff_head skb_queue_unreliable;
+
+ /* Station data structures */
+ spinlock_t sta_lock; /* mutex for STA data structures */
+ int num_sta; /* number of stations in sta_list */
+ struct list_head sta_list;
+ struct list_head deleted_sta_list;
+ struct sta_info *sta_hash[STA_HASH_SIZE];
+ struct timer_list sta_cleanup;
+
+ unsigned long state[NUM_TX_DATA_QUEUES];
+ struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES];
+ struct tasklet_struct tx_pending_tasklet;
+
+ int mc_count; /* total count of multicast entries in all interfaces */
+ int iff_allmultis, iff_promiscs;
+ /* number of interfaces with corresponding IFF_ flags */
+
+ struct rate_control_ref *rate_ctrl;
+
+ int next_mode; /* MODE_IEEE80211*
+ * The mode preference for next channel change. This is
+ * used to select .11g vs. .11b channels (or 4.9 GHz vs.
+ * .11a) when the channel number is not unique. */
+
+ /* Supported and basic rate filters for different modes. These are
+ * pointers to -1 terminated lists and rates in 100 kbps units. */
+ int *supp_rates[NUM_IEEE80211_MODES];
+ int *basic_rates[NUM_IEEE80211_MODES];
+
+ int rts_threshold;
+ int cts_protect_erp_frames;
+ int fragmentation_threshold;
+ int short_retry_limit; /* dot11ShortRetryLimit */
+ int long_retry_limit; /* dot11LongRetryLimit */
+ int short_preamble; /* use short preamble with IEEE 802.11b */
+
+ struct crypto_blkcipher *wep_tx_tfm;
+ struct crypto_blkcipher *wep_rx_tfm;
+ u32 wep_iv;
+ int key_tx_rx_threshold; /* number of times any key can be used in TX
+ * or RX before generating a rekey
+ * notification; 0 = notification disabled. */
+
+ int bridge_packets; /* bridge packets between associated stations and
+ * deliver multicast frames both back to wireless
+ * media and to the local net stack */
+
+ ieee80211_rx_handler *rx_pre_handlers;
+ ieee80211_rx_handler *rx_handlers;
+ ieee80211_tx_handler *tx_handlers;
+
+ rwlock_t sub_if_lock; /* Protects sub_if_list. Cannot be taken under
+ * sta_bss_lock or sta_lock. */
+ struct list_head sub_if_list;
+ int sta_scanning;
+ int scan_channel_idx;
+ enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
+ unsigned long last_scan_completed;
+ struct delayed_work scan_work;
+ struct net_device *scan_dev;
+ struct ieee80211_channel *oper_channel, *scan_channel;
+ struct ieee80211_hw_mode *oper_hw_mode, *scan_hw_mode;
+ u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
+ size_t scan_ssid_len;
+ struct list_head sta_bss_list;
+ struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE];
+ spinlock_t sta_bss_lock;
+#define IEEE80211_SCAN_MATCH_SSID BIT(0)
+#define IEEE80211_SCAN_WPA_ONLY BIT(1)
+#define IEEE80211_SCAN_EXTRA_INFO BIT(2)
+ int scan_flags;
+
+ /* SNMP counters */
+ /* dot11CountersTable */
+ u32 dot11TransmittedFragmentCount;
+ u32 dot11MulticastTransmittedFrameCount;
+ u32 dot11FailedCount;
+ u32 dot11RetryCount;
+ u32 dot11MultipleRetryCount;
+ u32 dot11FrameDuplicateCount;
+ u32 dot11ReceivedFragmentCount;
+ u32 dot11MulticastReceivedFrameCount;
+ u32 dot11TransmittedFrameCount;
+ u32 dot11WEPUndecryptableCount;
+
+#ifdef CONFIG_MAC80211_LEDS
+ int tx_led_counter, rx_led_counter;
+ struct led_trigger *tx_led, *rx_led;
+ char tx_led_name[32], rx_led_name[32];
+#endif
+
+ u32 channel_use;
+ u32 channel_use_raw;
+ u32 stat_time;
+ struct timer_list stat_timer;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct work_struct sta_debugfs_add;
+#endif
+
+ enum {
+ STA_ANTENNA_SEL_AUTO = 0,
+ STA_ANTENNA_SEL_SW_CTRL = 1,
+ STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2
+ } sta_antenna_sel;
+
+ int rate_ctrl_num_up, rate_ctrl_num_down;
+
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ /* TX/RX handler statistics */
+ unsigned int tx_handlers_drop;
+ unsigned int tx_handlers_queued;
+ unsigned int tx_handlers_drop_unencrypted;
+ unsigned int tx_handlers_drop_fragment;
+ unsigned int tx_handlers_drop_wep;
+ unsigned int tx_handlers_drop_not_assoc;
+ unsigned int tx_handlers_drop_unauth_port;
+ unsigned int rx_handlers_drop;
+ unsigned int rx_handlers_queued;
+ unsigned int rx_handlers_drop_nullfunc;
+ unsigned int rx_handlers_drop_defrag;
+ unsigned int rx_handlers_drop_short;
+ unsigned int rx_handlers_drop_passive_scan;
+ unsigned int tx_expand_skb_head;
+ unsigned int tx_expand_skb_head_cloned;
+ unsigned int rx_expand_skb_head;
+ unsigned int rx_expand_skb_head2;
+ unsigned int rx_handlers_fragments;
+ unsigned int tx_status_drop;
+ unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES];
+ unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
+#define I802_DEBUG_INC(c) (c)++
+#else /* CONFIG_MAC80211_DEBUG_COUNTERS */
+#define I802_DEBUG_INC(c) do { } while (0)
+#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */
+
+
+ int default_wep_only; /* only default WEP keys are used with this
+ * interface; this is used to decide when hwaccel
+ * can be used with default keys */
+ int total_ps_buffered; /* total number of all buffered unicast and
+ * multicast packets for power saving stations
+ */
+ int allow_broadcast_always; /* whether to allow TX of broadcast frames
+ * even when there are no associated STAs
+ */
+
+ int wifi_wme_noack_test;
+ unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
+
+ unsigned int enabled_modes; /* bitfield of allowed modes;
+ * (1 << MODE_*) */
+ unsigned int hw_modes; /* bitfield of supported hardware modes;
+ * (1 << MODE_*) */
+
+ int user_space_mlme;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct local_debugfsdentries {
+ struct dentry *channel;
+ struct dentry *frequency;
+ struct dentry *radar_detect;
+ struct dentry *antenna_sel_tx;
+ struct dentry *antenna_sel_rx;
+ struct dentry *bridge_packets;
+ struct dentry *key_tx_rx_threshold;
+ struct dentry *rts_threshold;
+ struct dentry *fragmentation_threshold;
+ struct dentry *short_retry_limit;
+ struct dentry *long_retry_limit;
+ struct dentry *total_ps_buffered;
+ struct dentry *mode;
+ struct dentry *wep_iv;
+ struct dentry *tx_power_reduction;
+ struct dentry *modes;
+ struct dentry *statistics;
+ struct local_debugfsdentries_statsdentries {
+ struct dentry *transmitted_fragment_count;
+ struct dentry *multicast_transmitted_frame_count;
+ struct dentry *failed_count;
+ struct dentry *retry_count;
+ struct dentry *multiple_retry_count;
+ struct dentry *frame_duplicate_count;
+ struct dentry *received_fragment_count;
+ struct dentry *multicast_received_frame_count;
+ struct dentry *transmitted_frame_count;
+ struct dentry *wep_undecryptable_count;
+ struct dentry *num_scans;
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ struct dentry *tx_handlers_drop;
+ struct dentry *tx_handlers_queued;
+ struct dentry *tx_handlers_drop_unencrypted;
+ struct dentry *tx_handlers_drop_fragment;
+ struct dentry *tx_handlers_drop_wep;
+ struct dentry *tx_handlers_drop_not_assoc;
+ struct dentry *tx_handlers_drop_unauth_port;
+ struct dentry *rx_handlers_drop;
+ struct dentry *rx_handlers_queued;
+ struct dentry *rx_handlers_drop_nullfunc;
+ struct dentry *rx_handlers_drop_defrag;
+ struct dentry *rx_handlers_drop_short;
+ struct dentry *rx_handlers_drop_passive_scan;
+ struct dentry *tx_expand_skb_head;
+ struct dentry *tx_expand_skb_head_cloned;
+ struct dentry *rx_expand_skb_head;
+ struct dentry *rx_expand_skb_head2;
+ struct dentry *rx_handlers_fragments;
+ struct dentry *tx_status_drop;
+ struct dentry *wme_tx_queue;
+ struct dentry *wme_rx_queue;
+#endif
+ struct dentry *dot11ACKFailureCount;
+ struct dentry *dot11RTSFailureCount;
+ struct dentry *dot11FCSErrorCount;
+ struct dentry *dot11RTSSuccessCount;
+ } stats;
+ struct dentry *stations;
+ struct dentry *keys;
+ } debugfs;
+#endif
+};
+
+static inline struct ieee80211_local *hw_to_local(
+ struct ieee80211_hw *hw)
+{
+ return container_of(hw, struct ieee80211_local, hw);
+}
+
+static inline struct ieee80211_hw *local_to_hw(
+ struct ieee80211_local *local)
+{
+ return &local->hw;
+}
+
+enum ieee80211_link_state_t {
+ IEEE80211_LINK_STATE_XOFF = 0,
+ IEEE80211_LINK_STATE_PENDING,
+};
+
+struct sta_attribute {
+ struct attribute attr;
+ ssize_t (*show)(const struct sta_info *, char *buf);
+ ssize_t (*store)(struct sta_info *, const char *buf, size_t count);
+};
+
+static inline void __bss_tim_set(struct ieee80211_if_ap *bss, int aid)
+{
+ /*
+ * This format has ben mandated by the IEEE specifications,
+ * so this line may not be changed to use the __set_bit() format.
+ */
+ bss->tim[(aid)/8] |= 1<<((aid) % 8);
+}
+
+static inline void bss_tim_set(struct ieee80211_local *local,
+ struct ieee80211_if_ap *bss, int aid)
+{
+ spin_lock_bh(&local->sta_lock);
+ __bss_tim_set(bss, aid);
+ spin_unlock_bh(&local->sta_lock);
+}
+
+static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, int aid)
+{
+ /*
+ * This format has ben mandated by the IEEE specifications,
+ * so this line may not be changed to use the __clear_bit() format.
+ */
+ bss->tim[(aid)/8] &= !(1<<((aid) % 8));
+}
+
+static inline void bss_tim_clear(struct ieee80211_local *local,
+ struct ieee80211_if_ap *bss, int aid)
+{
+ spin_lock_bh(&local->sta_lock);
+ __bss_tim_clear(bss, aid);
+ spin_unlock_bh(&local->sta_lock);
+}
+
+/**
+ * ieee80211_is_erp_rate - Check if a rate is an ERP rate
+ * @phymode: The PHY-mode for this rate (MODE_IEEE80211...)
+ * @rate: Transmission rate to check, in 100 kbps
+ *
+ * Check if a given rate is an Extended Rate PHY (ERP) rate.
+ */
+static inline int ieee80211_is_erp_rate(int phymode, int rate)
+{
+ if (phymode == MODE_IEEE80211G) {
+ if (rate != 10 && rate != 20 &&
+ rate != 55 && rate != 110)
+ return 1;
+ }
+ return 0;
+}
+
+/* ieee80211.c */
+int ieee80211_hw_config(struct ieee80211_local *local);
+int ieee80211_if_config(struct net_device *dev);
+int ieee80211_if_config_beacon(struct net_device *dev);
+struct ieee80211_key_conf *
+ieee80211_key_data2conf(struct ieee80211_local *local,
+ const struct ieee80211_key *data);
+struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata,
+ int idx, size_t key_len, gfp_t flags);
+void ieee80211_key_free(struct ieee80211_key *key);
+void ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_rx_status *status, u32 msg_type);
+void ieee80211_prepare_rates(struct ieee80211_local *local,
+ struct ieee80211_hw_mode *mode);
+void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx);
+int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr);
+void ieee80211_if_setup(struct net_device *dev);
+void ieee80211_if_mgmt_setup(struct net_device *dev);
+int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
+ const char *name);
+struct net_device_stats *ieee80211_dev_stats(struct net_device *dev);
+
+/* ieee80211_ioctl.c */
+extern const struct iw_handler_def ieee80211_iw_handler_def;
+
+void ieee80211_update_default_wep_only(struct ieee80211_local *local);
+
+
+/* Least common multiple of the used rates (in 100 kbps). This is used to
+ * calculate rate_inv values for each rate so that only integers are needed. */
+#define CHAN_UTIL_RATE_LCM 95040
+/* 1 usec is 1/8 * (95040/10) = 1188 */
+#define CHAN_UTIL_PER_USEC 1188
+/* Amount of bits to shift the result right to scale the total utilization
+ * to values that will not wrap around 32-bit integers. */
+#define CHAN_UTIL_SHIFT 9
+/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1):
+ * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the
+ * raw value with about 23 should give utilization in 10th of a percentage
+ * (1/1000). However, utilization is only estimated and not all intervals
+ * between frames etc. are calculated. 18 seems to give numbers that are closer
+ * to the real maximum. */
+#define CHAN_UTIL_PER_10MS 18
+#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC)
+#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC)
+
+
+/* ieee80211_ioctl.c */
+int ieee80211_set_compression(struct ieee80211_local *local,
+ struct net_device *dev, struct sta_info *sta);
+int ieee80211_init_client(struct net_device *dev);
+int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq);
+/* ieee80211_sta.c */
+void ieee80211_sta_timer(unsigned long data);
+void ieee80211_sta_work(struct work_struct *work);
+void ieee80211_sta_scan_work(struct work_struct *work);
+void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_rx_status *rx_status);
+int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len);
+int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len);
+int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid);
+int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
+void ieee80211_sta_req_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta);
+int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len);
+void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_rx_status *rx_status);
+void ieee80211_rx_bss_list_init(struct net_device *dev);
+void ieee80211_rx_bss_list_deinit(struct net_device *dev);
+int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len);
+struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
+ struct sk_buff *skb, u8 *bssid,
+ u8 *addr);
+int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
+int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
+
+/* ieee80211_iface.c */
+int ieee80211_if_add(struct net_device *dev, const char *name,
+ struct net_device **new_dev, int type);
+void ieee80211_if_set_type(struct net_device *dev, int type);
+void ieee80211_if_reinit(struct net_device *dev);
+void __ieee80211_if_del(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
+int ieee80211_if_remove(struct net_device *dev, const char *name, int id);
+void ieee80211_if_free(struct net_device *dev);
+void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata);
+int ieee80211_if_add_mgmt(struct ieee80211_local *local);
+void ieee80211_if_del_mgmt(struct ieee80211_local *local);
+
+/* for wiphy privid */
+extern void *mac80211_wiphy_privid;
+
+#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
new file mode 100644
index 000000000000..cf0f32e8c2a2
--- /dev/null
+++ b/net/mac80211/ieee80211_iface.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+#include "debugfs_netdev.h"
+
+void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
+{
+ int i;
+
+ /* Default values for sub-interface parameters */
+ sdata->drop_unencrypted = 0;
+ sdata->eapol = 1;
+ for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
+ skb_queue_head_init(&sdata->fragments[i].skb_list);
+}
+
+static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata)
+{
+ int i;
+
+ for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
+ __skb_queue_purge(&sdata->fragments[i].skb_list);
+ }
+}
+
+/* Must be called with rtnl lock held. */
+int ieee80211_if_add(struct net_device *dev, const char *name,
+ struct net_device **new_dev, int type)
+{
+ struct net_device *ndev;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = NULL;
+ int ret;
+
+ ASSERT_RTNL();
+ ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data),
+ name, ieee80211_if_setup);
+ if (!ndev)
+ return -ENOMEM;
+
+ ret = dev_alloc_name(ndev, ndev->name);
+ if (ret < 0)
+ goto fail;
+
+ memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+ ndev->base_addr = dev->base_addr;
+ ndev->irq = dev->irq;
+ ndev->mem_start = dev->mem_start;
+ ndev->mem_end = dev->mem_end;
+ SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+ ndev->ieee80211_ptr = &sdata->wdev;
+ sdata->wdev.wiphy = local->hw.wiphy;
+ sdata->type = IEEE80211_IF_TYPE_AP;
+ sdata->dev = ndev;
+ sdata->local = local;
+ ieee80211_if_sdata_init(sdata);
+
+ ret = register_netdevice(ndev);
+ if (ret)
+ goto fail;
+
+ ieee80211_debugfs_add_netdev(sdata);
+ ieee80211_if_set_type(ndev, type);
+
+ write_lock_bh(&local->sub_if_lock);
+ if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) {
+ write_unlock_bh(&local->sub_if_lock);
+ __ieee80211_if_del(local, sdata);
+ return -ENODEV;
+ }
+ list_add(&sdata->list, &local->sub_if_list);
+ if (new_dev)
+ *new_dev = ndev;
+ write_unlock_bh(&local->sub_if_lock);
+
+ ieee80211_update_default_wep_only(local);
+
+ return 0;
+
+fail:
+ free_netdev(ndev);
+ return ret;
+}
+
+int ieee80211_if_add_mgmt(struct ieee80211_local *local)
+{
+ struct net_device *ndev;
+ struct ieee80211_sub_if_data *nsdata;
+ int ret;
+
+ ASSERT_RTNL();
+
+ ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), "wmgmt%d",
+ ieee80211_if_mgmt_setup);
+ if (!ndev)
+ return -ENOMEM;
+ ret = dev_alloc_name(ndev, ndev->name);
+ if (ret < 0)
+ goto fail;
+
+ memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+ SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
+
+ nsdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+ ndev->ieee80211_ptr = &nsdata->wdev;
+ nsdata->wdev.wiphy = local->hw.wiphy;
+ nsdata->type = IEEE80211_IF_TYPE_MGMT;
+ nsdata->dev = ndev;
+ nsdata->local = local;
+ ieee80211_if_sdata_init(nsdata);
+
+ ret = register_netdevice(ndev);
+ if (ret)
+ goto fail;
+
+ ieee80211_debugfs_add_netdev(nsdata);
+
+ if (local->open_count > 0)
+ dev_open(ndev);
+ local->apdev = ndev;
+ return 0;
+
+fail:
+ free_netdev(ndev);
+ return ret;
+}
+
+void ieee80211_if_del_mgmt(struct ieee80211_local *local)
+{
+ struct net_device *apdev;
+
+ ASSERT_RTNL();
+ apdev = local->apdev;
+ ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(apdev));
+ local->apdev = NULL;
+ unregister_netdevice(apdev);
+}
+
+void ieee80211_if_set_type(struct net_device *dev, int type)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ int oldtype = sdata->type;
+
+ sdata->type = type;
+ switch (type) {
+ case IEEE80211_IF_TYPE_WDS:
+ sdata->bss = NULL;
+ break;
+ case IEEE80211_IF_TYPE_VLAN:
+ break;
+ case IEEE80211_IF_TYPE_AP:
+ sdata->u.ap.dtim_period = 2;
+ sdata->u.ap.force_unicast_rateidx = -1;
+ sdata->u.ap.max_ratectrl_rateidx = -1;
+ skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
+ sdata->bss = &sdata->u.ap;
+ break;
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS: {
+ struct ieee80211_sub_if_data *msdata;
+ struct ieee80211_if_sta *ifsta;
+
+ ifsta = &sdata->u.sta;
+ INIT_WORK(&ifsta->work, ieee80211_sta_work);
+ setup_timer(&ifsta->timer, ieee80211_sta_timer,
+ (unsigned long) sdata);
+ skb_queue_head_init(&ifsta->skb_queue);
+
+ ifsta->capab = WLAN_CAPABILITY_ESS;
+ ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
+ IEEE80211_AUTH_ALG_SHARED_KEY;
+ ifsta->create_ibss = 1;
+ ifsta->wmm_enabled = 1;
+ ifsta->auto_channel_sel = 1;
+ ifsta->auto_bssid_sel = 1;
+
+ msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev);
+ sdata->bss = &msdata->u.ap;
+ break;
+ }
+ case IEEE80211_IF_TYPE_MNTR:
+ dev->type = ARPHRD_IEEE80211_RADIOTAP;
+ break;
+ default:
+ printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x",
+ dev->name, __FUNCTION__, type);
+ }
+ ieee80211_debugfs_change_if_type(sdata, oldtype);
+ ieee80211_update_default_wep_only(local);
+}
+
+/* Must be called with rtnl lock held. */
+void ieee80211_if_reinit(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct sta_info *sta;
+ int i;
+
+ ASSERT_RTNL();
+ ieee80211_if_sdata_deinit(sdata);
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ if (!sdata->keys[i])
+ continue;
+#if 0
+ /* The interface is down at the moment, so there is not
+ * really much point in disabling the keys at this point. */
+ memset(addr, 0xff, ETH_ALEN);
+ if (local->ops->set_key)
+ local->ops->set_key(local_to_hw(local), DISABLE_KEY, addr,
+ local->keys[i], 0);
+#endif
+ ieee80211_key_free(sdata->keys[i]);
+ sdata->keys[i] = NULL;
+ }
+
+ switch (sdata->type) {
+ case IEEE80211_IF_TYPE_AP: {
+ /* Remove all virtual interfaces that use this BSS
+ * as their sdata->bss */
+ struct ieee80211_sub_if_data *tsdata, *n;
+ LIST_HEAD(tmp_list);
+
+ write_lock_bh(&local->sub_if_lock);
+ list_for_each_entry_safe(tsdata, n, &local->sub_if_list, list) {
+ if (tsdata != sdata && tsdata->bss == &sdata->u.ap) {
+ printk(KERN_DEBUG "%s: removing virtual "
+ "interface %s because its BSS interface"
+ " is being removed\n",
+ sdata->dev->name, tsdata->dev->name);
+ list_move_tail(&tsdata->list, &tmp_list);
+ }
+ }
+ write_unlock_bh(&local->sub_if_lock);
+
+ list_for_each_entry_safe(tsdata, n, &tmp_list, list)
+ __ieee80211_if_del(local, tsdata);
+
+ kfree(sdata->u.ap.beacon_head);
+ kfree(sdata->u.ap.beacon_tail);
+ kfree(sdata->u.ap.generic_elem);
+
+ if (dev != local->mdev) {
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
+ local->total_ps_buffered--;
+ dev_kfree_skb(skb);
+ }
+ }
+
+ break;
+ }
+ case IEEE80211_IF_TYPE_WDS:
+ sta = sta_info_get(local, sdata->u.wds.remote_addr);
+ if (sta) {
+ sta_info_put(sta);
+ sta_info_free(sta, 0);
+ } else {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: Someone had deleted my STA "
+ "entry for the WDS link\n", dev->name);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ }
+ break;
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ kfree(sdata->u.sta.extra_ie);
+ sdata->u.sta.extra_ie = NULL;
+ kfree(sdata->u.sta.assocreq_ies);
+ sdata->u.sta.assocreq_ies = NULL;
+ kfree(sdata->u.sta.assocresp_ies);
+ sdata->u.sta.assocresp_ies = NULL;
+ if (sdata->u.sta.probe_resp) {
+ dev_kfree_skb(sdata->u.sta.probe_resp);
+ sdata->u.sta.probe_resp = NULL;
+ }
+
+ break;
+ case IEEE80211_IF_TYPE_MNTR:
+ dev->type = ARPHRD_ETHER;
+ break;
+ }
+
+ /* remove all STAs that are bound to this virtual interface */
+ sta_info_flush(local, dev);
+
+ memset(&sdata->u, 0, sizeof(sdata->u));
+ ieee80211_if_sdata_init(sdata);
+}
+
+/* Must be called with rtnl lock held. */
+void __ieee80211_if_del(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct net_device *dev = sdata->dev;
+
+ ieee80211_debugfs_remove_netdev(sdata);
+ unregister_netdevice(dev);
+ /* Except master interface, the net_device will be freed by
+ * net_device->destructor (i. e. ieee80211_if_free). */
+}
+
+/* Must be called with rtnl lock held. */
+int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata, *n;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&local->sub_if_lock);
+ list_for_each_entry_safe(sdata, n, &local->sub_if_list, list) {
+ if ((sdata->type == id || id == -1) &&
+ strcmp(name, sdata->dev->name) == 0 &&
+ sdata->dev != local->mdev) {
+ list_del(&sdata->list);
+ write_unlock_bh(&local->sub_if_lock);
+ __ieee80211_if_del(local, sdata);
+ ieee80211_update_default_wep_only(local);
+ return 0;
+ }
+ }
+ write_unlock_bh(&local->sub_if_lock);
+ return -ENODEV;
+}
+
+void ieee80211_if_free(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ /* local->apdev must be NULL when freeing management interface */
+ BUG_ON(dev == local->apdev);
+ ieee80211_if_sdata_deinit(sdata);
+ free_netdev(dev);
+}
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
new file mode 100644
index 000000000000..352f03bd8a3a
--- /dev/null
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -0,0 +1,1822 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/wireless.h>
+#include <net/iw_handler.h>
+#include <asm/uaccess.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "hostapd_ioctl.h"
+#include "ieee80211_rate.h"
+#include "wpa.h"
+#include "aes_ccm.h"
+#include "debugfs_key.h"
+
+static int ieee80211_regdom = 0x10; /* FCC */
+module_param(ieee80211_regdom, int, 0444);
+MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain; 64=MKK");
+
+/*
+ * If firmware is upgraded by the vendor, additional channels can be used based
+ * on the new Japanese regulatory rules. This is indicated by setting
+ * ieee80211_japan_5ghz module parameter to one when loading the 80211 kernel
+ * module.
+ */
+static int ieee80211_japan_5ghz /* = 0 */;
+module_param(ieee80211_japan_5ghz, int, 0444);
+MODULE_PARM_DESC(ieee80211_japan_5ghz, "Vendor-updated firmware for 5 GHz");
+
+static void ieee80211_set_hw_encryption(struct net_device *dev,
+ struct sta_info *sta, u8 addr[ETH_ALEN],
+ struct ieee80211_key *key)
+{
+ struct ieee80211_key_conf *keyconf = NULL;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ /* default to sw encryption; this will be cleared by low-level
+ * driver if the hw supports requested encryption */
+ if (key)
+ key->force_sw_encrypt = 1;
+
+ if (key && local->ops->set_key &&
+ (keyconf = ieee80211_key_data2conf(local, key))) {
+ if (local->ops->set_key(local_to_hw(local), SET_KEY, addr,
+ keyconf, sta ? sta->aid : 0)) {
+ key->force_sw_encrypt = 1;
+ key->hw_key_idx = HW_KEY_IDX_INVALID;
+ } else {
+ key->force_sw_encrypt =
+ !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT);
+ key->hw_key_idx =
+ keyconf->hw_key_idx;
+
+ }
+ }
+ kfree(keyconf);
+}
+
+
+static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr,
+ int idx, int alg, int set_tx_key,
+ const u8 *_key, size_t key_len)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ int ret = 0;
+ struct sta_info *sta;
+ struct ieee80211_key *key, *old_key;
+ int try_hwaccel = 1;
+ struct ieee80211_key_conf *keyconf;
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (is_broadcast_ether_addr(sta_addr)) {
+ sta = NULL;
+ if (idx >= NUM_DEFAULT_KEYS) {
+ printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
+ dev->name, idx);
+ return -EINVAL;
+ }
+ key = sdata->keys[idx];
+
+ /* TODO: consider adding hwaccel support for these; at least
+ * Atheros key cache should be able to handle this since AP is
+ * only transmitting frames with default keys. */
+ /* FIX: hw key cache can be used when only one virtual
+ * STA is associated with each AP. If more than one STA
+ * is associated to the same AP, software encryption
+ * must be used. This should be done automatically
+ * based on configured station devices. For the time
+ * being, this can be only set at compile time. */
+ } else {
+ set_tx_key = 0;
+ if (idx != 0) {
+ printk(KERN_DEBUG "%s: set_encrypt - non-zero idx for "
+ "individual key\n", dev->name);
+ return -EINVAL;
+ }
+
+ sta = sta_info_get(local, sta_addr);
+ if (!sta) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: set_encrypt - unknown addr "
+ MAC_FMT "\n",
+ dev->name, MAC_ARG(sta_addr));
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+
+ return -ENOENT;
+ }
+
+ key = sta->key;
+ }
+
+ /* FIX:
+ * Cannot configure default hwaccel keys with WEP algorithm, if
+ * any of the virtual interfaces is using static WEP
+ * configuration because hwaccel would otherwise try to decrypt
+ * these frames.
+ *
+ * For now, just disable WEP hwaccel for broadcast when there is
+ * possibility of conflict with default keys. This can maybe later be
+ * optimized by using non-default keys (at least with Atheros ar521x).
+ */
+ if (!sta && alg == ALG_WEP && !local->default_wep_only &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS &&
+ sdata->type != IEEE80211_IF_TYPE_AP) {
+ try_hwaccel = 0;
+ }
+
+ if (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) {
+ /* Software encryption cannot be used with devices that hide
+ * encryption from the host system, so always try to use
+ * hardware acceleration with such devices. */
+ try_hwaccel = 1;
+ }
+
+ if ((local->hw.flags & IEEE80211_HW_NO_TKIP_WMM_HWACCEL) &&
+ alg == ALG_TKIP) {
+ if (sta && (sta->flags & WLAN_STA_WME)) {
+ /* Hardware does not support hwaccel with TKIP when using WMM.
+ */
+ try_hwaccel = 0;
+ }
+ else if (sdata->type == IEEE80211_IF_TYPE_STA) {
+ sta = sta_info_get(local, sdata->u.sta.bssid);
+ if (sta) {
+ if (sta->flags & WLAN_STA_WME) {
+ try_hwaccel = 0;
+ }
+ sta_info_put(sta);
+ sta = NULL;
+ }
+ }
+ }
+
+ if (alg == ALG_NONE) {
+ keyconf = NULL;
+ if (try_hwaccel && key &&
+ key->hw_key_idx != HW_KEY_IDX_INVALID &&
+ local->ops->set_key &&
+ (keyconf = ieee80211_key_data2conf(local, key)) != NULL &&
+ local->ops->set_key(local_to_hw(local), DISABLE_KEY,
+ sta_addr, keyconf, sta ? sta->aid : 0)) {
+ printk(KERN_DEBUG "%s: set_encrypt - low-level disable"
+ " failed\n", dev->name);
+ ret = -EINVAL;
+ }
+ kfree(keyconf);
+
+ if (set_tx_key || sdata->default_key == key) {
+ ieee80211_debugfs_key_remove_default(sdata);
+ sdata->default_key = NULL;
+ }
+ ieee80211_debugfs_key_remove(key);
+ if (sta)
+ sta->key = NULL;
+ else
+ sdata->keys[idx] = NULL;
+ ieee80211_key_free(key);
+ key = NULL;
+ } else {
+ old_key = key;
+ key = ieee80211_key_alloc(sta ? NULL : sdata, idx, key_len,
+ GFP_KERNEL);
+ if (!key) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ /* default to sw encryption; low-level driver sets these if the
+ * requested encryption is supported */
+ key->hw_key_idx = HW_KEY_IDX_INVALID;
+ key->force_sw_encrypt = 1;
+
+ key->alg = alg;
+ key->keyidx = idx;
+ key->keylen = key_len;
+ memcpy(key->key, _key, key_len);
+ if (set_tx_key)
+ key->default_tx_key = 1;
+
+ if (alg == ALG_CCMP) {
+ /* Initialize AES key state here as an optimization
+ * so that it does not need to be initialized for every
+ * packet. */
+ key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(
+ key->key);
+ if (!key->u.ccmp.tfm) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+ }
+
+ if (set_tx_key || sdata->default_key == old_key) {
+ ieee80211_debugfs_key_remove_default(sdata);
+ sdata->default_key = NULL;
+ }
+ ieee80211_debugfs_key_remove(old_key);
+ if (sta)
+ sta->key = key;
+ else
+ sdata->keys[idx] = key;
+ ieee80211_key_free(old_key);
+ ieee80211_debugfs_key_add(local, key);
+ if (sta)
+ ieee80211_debugfs_key_sta_link(key, sta);
+
+ if (try_hwaccel &&
+ (alg == ALG_WEP || alg == ALG_TKIP || alg == ALG_CCMP))
+ ieee80211_set_hw_encryption(dev, sta, sta_addr, key);
+ }
+
+ if (set_tx_key || (!sta && !sdata->default_key && key)) {
+ sdata->default_key = key;
+ if (key)
+ ieee80211_debugfs_key_add_default(sdata);
+
+ if (local->ops->set_key_idx &&
+ local->ops->set_key_idx(local_to_hw(local), idx))
+ printk(KERN_DEBUG "%s: failed to set TX key idx for "
+ "low-level driver\n", dev->name);
+ }
+
+ if (sta)
+ sta_info_put(sta);
+
+ return 0;
+
+err_free:
+ ieee80211_key_free(key);
+err_out:
+ if (sta)
+ sta_info_put(sta);
+ return ret;
+}
+
+static int ieee80211_ioctl_siwgenie(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (local->user_space_mlme)
+ return -EOPNOTSUPP;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length);
+ if (ret)
+ return ret;
+ sdata->u.sta.auto_bssid_sel = 0;
+ ieee80211_sta_req_auth(dev, &sdata->u.sta);
+ return 0;
+ }
+
+ if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ kfree(sdata->u.ap.generic_elem);
+ sdata->u.ap.generic_elem = kmalloc(data->length, GFP_KERNEL);
+ if (!sdata->u.ap.generic_elem)
+ return -ENOMEM;
+ memcpy(sdata->u.ap.generic_elem, extra, data->length);
+ sdata->u.ap.generic_elem_len = data->length;
+ return ieee80211_if_config(dev);
+ }
+ return -EOPNOTSUPP;
+}
+
+static int ieee80211_ioctl_set_radio_enabled(struct net_device *dev,
+ int val)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_conf *conf = &local->hw.conf;
+
+ conf->radio_enabled = val;
+ return ieee80211_hw_config(wdev_priv(dev->ieee80211_ptr));
+}
+
+static int ieee80211_ioctl_giwname(struct net_device *dev,
+ struct iw_request_info *info,
+ char *name, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ switch (local->hw.conf.phymode) {
+ case MODE_IEEE80211A:
+ strcpy(name, "IEEE 802.11a");
+ break;
+ case MODE_IEEE80211B:
+ strcpy(name, "IEEE 802.11b");
+ break;
+ case MODE_IEEE80211G:
+ strcpy(name, "IEEE 802.11g");
+ break;
+ case MODE_ATHEROS_TURBO:
+ strcpy(name, "5GHz Turbo");
+ break;
+ default:
+ strcpy(name, "IEEE 802.11");
+ break;
+ }
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_giwrange(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct iw_range *range = (struct iw_range *) extra;
+
+ data->length = sizeof(struct iw_range);
+ memset(range, 0, sizeof(struct iw_range));
+
+ range->we_version_compiled = WIRELESS_EXT;
+ range->we_version_source = 21;
+ range->retry_capa = IW_RETRY_LIMIT;
+ range->retry_flags = IW_RETRY_LIMIT;
+ range->min_retry = 0;
+ range->max_retry = 255;
+ range->min_rts = 0;
+ range->max_rts = 2347;
+ range->min_frag = 256;
+ range->max_frag = 2346;
+
+ range->encoding_size[0] = 5;
+ range->encoding_size[1] = 13;
+ range->num_encoding_sizes = 2;
+ range->max_encoding_tokens = NUM_DEFAULT_KEYS;
+
+ range->max_qual.qual = local->hw.max_signal;
+ range->max_qual.level = local->hw.max_rssi;
+ range->max_qual.noise = local->hw.max_noise;
+ range->max_qual.updated = local->wstats_flags;
+
+ range->avg_qual.qual = local->hw.max_signal/2;
+ range->avg_qual.level = 0;
+ range->avg_qual.noise = 0;
+ range->avg_qual.updated = local->wstats_flags;
+
+ range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
+ IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
+
+ IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
+ IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY);
+ IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
+ IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
+
+ return 0;
+}
+
+
+struct ieee80211_channel_range {
+ short start_freq;
+ short end_freq;
+ unsigned char power_level;
+ unsigned char antenna_max;
+};
+
+static const struct ieee80211_channel_range ieee80211_fcc_channels[] = {
+ { 2412, 2462, 27, 6 } /* IEEE 802.11b/g, channels 1..11 */,
+ { 5180, 5240, 17, 6 } /* IEEE 802.11a, channels 36..48 */,
+ { 5260, 5320, 23, 6 } /* IEEE 802.11a, channels 52..64 */,
+ { 5745, 5825, 30, 6 } /* IEEE 802.11a, channels 149..165, outdoor */,
+ { 0 }
+};
+
+static const struct ieee80211_channel_range ieee80211_mkk_channels[] = {
+ { 2412, 2472, 20, 6 } /* IEEE 802.11b/g, channels 1..13 */,
+ { 5170, 5240, 20, 6 } /* IEEE 802.11a, channels 34..48 */,
+ { 5260, 5320, 20, 6 } /* IEEE 802.11a, channels 52..64 */,
+ { 0 }
+};
+
+
+static const struct ieee80211_channel_range *channel_range =
+ ieee80211_fcc_channels;
+
+
+static void ieee80211_unmask_channel(struct net_device *dev, int mode,
+ struct ieee80211_channel *chan)
+{
+ int i;
+
+ chan->flag = 0;
+
+ if (ieee80211_regdom == 64 &&
+ (mode == MODE_ATHEROS_TURBO || mode == MODE_ATHEROS_TURBOG)) {
+ /* Do not allow Turbo modes in Japan. */
+ return;
+ }
+
+ for (i = 0; channel_range[i].start_freq; i++) {
+ const struct ieee80211_channel_range *r = &channel_range[i];
+ if (r->start_freq <= chan->freq && r->end_freq >= chan->freq) {
+ if (ieee80211_regdom == 64 && !ieee80211_japan_5ghz &&
+ chan->freq >= 5260 && chan->freq <= 5320) {
+ /*
+ * Skip new channels in Japan since the
+ * firmware was not marked having been upgraded
+ * by the vendor.
+ */
+ continue;
+ }
+
+ if (ieee80211_regdom == 0x10 &&
+ (chan->freq == 5190 || chan->freq == 5210 ||
+ chan->freq == 5230)) {
+ /* Skip MKK channels when in FCC domain. */
+ continue;
+ }
+
+ chan->flag |= IEEE80211_CHAN_W_SCAN |
+ IEEE80211_CHAN_W_ACTIVE_SCAN |
+ IEEE80211_CHAN_W_IBSS;
+ chan->power_level = r->power_level;
+ chan->antenna_max = r->antenna_max;
+
+ if (ieee80211_regdom == 64 &&
+ (chan->freq == 5170 || chan->freq == 5190 ||
+ chan->freq == 5210 || chan->freq == 5230)) {
+ /*
+ * New regulatory rules in Japan have backwards
+ * compatibility with old channels in 5.15-5.25
+ * GHz band, but the station is not allowed to
+ * use active scan on these old channels.
+ */
+ chan->flag &= ~IEEE80211_CHAN_W_ACTIVE_SCAN;
+ }
+
+ if (ieee80211_regdom == 64 &&
+ (chan->freq == 5260 || chan->freq == 5280 ||
+ chan->freq == 5300 || chan->freq == 5320)) {
+ /*
+ * IBSS is not allowed on 5.25-5.35 GHz band
+ * due to radar detection requirements.
+ */
+ chan->flag &= ~IEEE80211_CHAN_W_IBSS;
+ }
+
+ break;
+ }
+ }
+}
+
+
+static int ieee80211_unmask_channels(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ int c;
+
+ list_for_each_entry(mode, &local->modes_list, list) {
+ for (c = 0; c < mode->num_channels; c++) {
+ ieee80211_unmask_channel(dev, mode->mode,
+ &mode->channels[c]);
+ }
+ }
+ return 0;
+}
+
+
+int ieee80211_init_client(struct net_device *dev)
+{
+ if (ieee80211_regdom == 0x40)
+ channel_range = ieee80211_mkk_channels;
+ ieee80211_unmask_channels(dev);
+ return 0;
+}
+
+
+static int ieee80211_ioctl_siwmode(struct net_device *dev,
+ struct iw_request_info *info,
+ __u32 *mode, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int type;
+
+ if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+ return -EOPNOTSUPP;
+
+ switch (*mode) {
+ case IW_MODE_INFRA:
+ type = IEEE80211_IF_TYPE_STA;
+ break;
+ case IW_MODE_ADHOC:
+ type = IEEE80211_IF_TYPE_IBSS;
+ break;
+ case IW_MODE_MONITOR:
+ type = IEEE80211_IF_TYPE_MNTR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (type == sdata->type)
+ return 0;
+ if (netif_running(dev))
+ return -EBUSY;
+
+ ieee80211_if_reinit(dev);
+ ieee80211_if_set_type(dev, type);
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_giwmode(struct net_device *dev,
+ struct iw_request_info *info,
+ __u32 *mode, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ switch (sdata->type) {
+ case IEEE80211_IF_TYPE_AP:
+ *mode = IW_MODE_MASTER;
+ break;
+ case IEEE80211_IF_TYPE_STA:
+ *mode = IW_MODE_INFRA;
+ break;
+ case IEEE80211_IF_TYPE_IBSS:
+ *mode = IW_MODE_ADHOC;
+ break;
+ case IEEE80211_IF_TYPE_MNTR:
+ *mode = IW_MODE_MONITOR;
+ break;
+ case IEEE80211_IF_TYPE_WDS:
+ *mode = IW_MODE_REPEAT;
+ break;
+ case IEEE80211_IF_TYPE_VLAN:
+ *mode = IW_MODE_SECOND; /* FIXME */
+ break;
+ default:
+ *mode = IW_MODE_AUTO;
+ break;
+ }
+ return 0;
+}
+
+int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq)
+{
+ struct ieee80211_hw_mode *mode;
+ int c, set = 0;
+ int ret = -EINVAL;
+
+ list_for_each_entry(mode, &local->modes_list, list) {
+ if (!(local->enabled_modes & (1 << mode->mode)))
+ continue;
+ for (c = 0; c < mode->num_channels; c++) {
+ struct ieee80211_channel *chan = &mode->channels[c];
+ if (chan->flag & IEEE80211_CHAN_W_SCAN &&
+ ((chan->chan == channel) || (chan->freq == freq))) {
+ /* Use next_mode as the mode preference to
+ * resolve non-unique channel numbers. */
+ if (set && mode->mode != local->next_mode)
+ continue;
+
+ local->oper_channel = chan;
+ local->oper_hw_mode = mode;
+ set++;
+ }
+ }
+ }
+
+ if (set) {
+ if (local->sta_scanning)
+ ret = 0;
+ else
+ ret = ieee80211_hw_config(local);
+
+ rate_control_clear(local);
+ }
+
+ return ret;
+}
+
+static int ieee80211_ioctl_siwfreq(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_freq *freq, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (sdata->type == IEEE80211_IF_TYPE_STA)
+ sdata->u.sta.auto_channel_sel = 0;
+
+ /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
+ if (freq->e == 0) {
+ if (freq->m < 0) {
+ if (sdata->type == IEEE80211_IF_TYPE_STA)
+ sdata->u.sta.auto_channel_sel = 1;
+ return 0;
+ } else
+ return ieee80211_set_channel(local, freq->m, -1);
+ } else {
+ int i, div = 1000000;
+ for (i = 0; i < freq->e; i++)
+ div /= 10;
+ if (div > 0)
+ return ieee80211_set_channel(local, -1, freq->m / div);
+ else
+ return -EINVAL;
+ }
+}
+
+
+static int ieee80211_ioctl_giwfreq(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_freq *freq, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ /* TODO: in station mode (Managed/Ad-hoc) might need to poll low-level
+ * driver for the current channel with firmware-based management */
+
+ freq->m = local->hw.conf.freq;
+ freq->e = 6;
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_siwessid(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *ssid)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ size_t len = data->length;
+
+ /* iwconfig uses nul termination in SSID.. */
+ if (len > 0 && ssid[len - 1] == '\0')
+ len--;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ int ret;
+ if (local->user_space_mlme) {
+ if (len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+ memcpy(sdata->u.sta.ssid, ssid, len);
+ sdata->u.sta.ssid_len = len;
+ return 0;
+ }
+ sdata->u.sta.auto_ssid_sel = !data->flags;
+ ret = ieee80211_sta_set_ssid(dev, ssid, len);
+ if (ret)
+ return ret;
+ ieee80211_sta_req_auth(dev, &sdata->u.sta);
+ return 0;
+ }
+
+ if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ memcpy(sdata->u.ap.ssid, ssid, len);
+ memset(sdata->u.ap.ssid + len, 0,
+ IEEE80211_MAX_SSID_LEN - len);
+ sdata->u.ap.ssid_len = len;
+ return ieee80211_if_config(dev);
+ }
+ return -EOPNOTSUPP;
+}
+
+
+static int ieee80211_ioctl_giwessid(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *ssid)
+{
+ size_t len;
+
+ struct ieee80211_sub_if_data *sdata;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ int res = ieee80211_sta_get_ssid(dev, ssid, &len);
+ if (res == 0) {
+ data->length = len;
+ data->flags = 1;
+ } else
+ data->flags = 0;
+ return res;
+ }
+
+ if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ len = sdata->u.ap.ssid_len;
+ if (len > IW_ESSID_MAX_SIZE)
+ len = IW_ESSID_MAX_SIZE;
+ memcpy(ssid, sdata->u.ap.ssid, len);
+ data->length = len;
+ data->flags = 1;
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+
+static int ieee80211_ioctl_siwap(struct net_device *dev,
+ struct iw_request_info *info,
+ struct sockaddr *ap_addr, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ int ret;
+ if (local->user_space_mlme) {
+ memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
+ ETH_ALEN);
+ return 0;
+ }
+ if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) {
+ sdata->u.sta.auto_bssid_sel = 1;
+ sdata->u.sta.auto_channel_sel = 1;
+ } else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
+ sdata->u.sta.auto_bssid_sel = 1;
+ else
+ sdata->u.sta.auto_bssid_sel = 0;
+ ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data);
+ if (ret)
+ return ret;
+ ieee80211_sta_req_auth(dev, &sdata->u.sta);
+ return 0;
+ } else if (sdata->type == IEEE80211_IF_TYPE_WDS) {
+ if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data,
+ ETH_ALEN) == 0)
+ return 0;
+ return ieee80211_if_update_wds(dev, (u8 *) &ap_addr->sa_data);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+
+static int ieee80211_ioctl_giwap(struct net_device *dev,
+ struct iw_request_info *info,
+ struct sockaddr *ap_addr, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ ap_addr->sa_family = ARPHRD_ETHER;
+ memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
+ return 0;
+ } else if (sdata->type == IEEE80211_IF_TYPE_WDS) {
+ ap_addr->sa_family = ARPHRD_ETHER;
+ memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+
+static int ieee80211_ioctl_siwscan(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ u8 *ssid = NULL;
+ size_t ssid_len = 0;
+
+ if (!netif_running(dev))
+ return -ENETDOWN;
+
+ if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ ssid = sdata->u.sta.ssid;
+ ssid_len = sdata->u.sta.ssid_len;
+ } else if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ ssid = sdata->u.ap.ssid;
+ ssid_len = sdata->u.ap.ssid_len;
+ } else
+ return -EINVAL;
+ }
+ return ieee80211_sta_req_scan(dev, ssid, ssid_len);
+}
+
+
+static int ieee80211_ioctl_giwscan(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *extra)
+{
+ int res;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ if (local->sta_scanning)
+ return -EAGAIN;
+ res = ieee80211_sta_scan_results(dev, extra, data->length);
+ if (res >= 0) {
+ data->length = res;
+ return 0;
+ }
+ data->length = 0;
+ return res;
+}
+
+
+static int ieee80211_ioctl_siwrts(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *rts, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (rts->disabled)
+ local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
+ else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD)
+ return -EINVAL;
+ else
+ local->rts_threshold = rts->value;
+
+ /* If the wlan card performs RTS/CTS in hardware/firmware,
+ * configure it here */
+
+ if (local->ops->set_rts_threshold)
+ local->ops->set_rts_threshold(local_to_hw(local),
+ local->rts_threshold);
+
+ return 0;
+}
+
+static int ieee80211_ioctl_giwrts(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *rts, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ rts->value = local->rts_threshold;
+ rts->disabled = (rts->value >= IEEE80211_MAX_RTS_THRESHOLD);
+ rts->fixed = 1;
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_siwfrag(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *frag, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (frag->disabled)
+ local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
+ else if (frag->value < 256 ||
+ frag->value > IEEE80211_MAX_FRAG_THRESHOLD)
+ return -EINVAL;
+ else {
+ /* Fragment length must be even, so strip LSB. */
+ local->fragmentation_threshold = frag->value & ~0x1;
+ }
+
+ /* If the wlan card performs fragmentation in hardware/firmware,
+ * configure it here */
+
+ if (local->ops->set_frag_threshold)
+ local->ops->set_frag_threshold(
+ local_to_hw(local),
+ local->fragmentation_threshold);
+
+ return 0;
+}
+
+static int ieee80211_ioctl_giwfrag(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *frag, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ frag->value = local->fragmentation_threshold;
+ frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD);
+ frag->fixed = 1;
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_siwretry(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *retry, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (retry->disabled ||
+ (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT)
+ return -EINVAL;
+
+ if (retry->flags & IW_RETRY_MAX)
+ local->long_retry_limit = retry->value;
+ else if (retry->flags & IW_RETRY_MIN)
+ local->short_retry_limit = retry->value;
+ else {
+ local->long_retry_limit = retry->value;
+ local->short_retry_limit = retry->value;
+ }
+
+ if (local->ops->set_retry_limit) {
+ return local->ops->set_retry_limit(
+ local_to_hw(local),
+ local->short_retry_limit,
+ local->long_retry_limit);
+ }
+
+ return 0;
+}
+
+
+static int ieee80211_ioctl_giwretry(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *retry, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ retry->disabled = 0;
+ if (retry->flags == 0 || retry->flags & IW_RETRY_MIN) {
+ /* first return min value, iwconfig will ask max value
+ * later if needed */
+ retry->flags |= IW_RETRY_LIMIT;
+ retry->value = local->short_retry_limit;
+ if (local->long_retry_limit != local->short_retry_limit)
+ retry->flags |= IW_RETRY_MIN;
+ return 0;
+ }
+ if (retry->flags & IW_RETRY_MAX) {
+ retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX;
+ retry->value = local->long_retry_limit;
+ }
+
+ return 0;
+}
+
+static int ieee80211_ioctl_clear_keys(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_key_conf key;
+ int i;
+ u8 addr[ETH_ALEN];
+ struct ieee80211_key_conf *keyconf;
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+
+ memset(addr, 0xff, ETH_ALEN);
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ keyconf = NULL;
+ if (sdata->keys[i] &&
+ !sdata->keys[i]->force_sw_encrypt &&
+ local->ops->set_key &&
+ (keyconf = ieee80211_key_data2conf(local,
+ sdata->keys[i])))
+ local->ops->set_key(local_to_hw(local),
+ DISABLE_KEY, addr,
+ keyconf, 0);
+ kfree(keyconf);
+ ieee80211_key_free(sdata->keys[i]);
+ sdata->keys[i] = NULL;
+ }
+ sdata->default_key = NULL;
+ }
+ read_unlock(&local->sub_if_lock);
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ keyconf = NULL;
+ if (sta->key && !sta->key->force_sw_encrypt &&
+ local->ops->set_key &&
+ (keyconf = ieee80211_key_data2conf(local, sta->key)))
+ local->ops->set_key(local_to_hw(local), DISABLE_KEY,
+ sta->addr, keyconf, sta->aid);
+ kfree(keyconf);
+ ieee80211_key_free(sta->key);
+ sta->key = NULL;
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ memset(&key, 0, sizeof(key));
+ if (local->ops->set_key &&
+ local->ops->set_key(local_to_hw(local), REMOVE_ALL_KEYS,
+ NULL, &key, 0))
+ printk(KERN_DEBUG "%s: failed to remove hwaccel keys\n",
+ dev->name);
+
+ return 0;
+}
+
+
+static int
+ieee80211_ioctl_force_unicast_rate(struct net_device *dev,
+ struct ieee80211_sub_if_data *sdata,
+ int rate)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ int i;
+
+ if (sdata->type != IEEE80211_IF_TYPE_AP)
+ return -ENOENT;
+
+ if (rate == 0) {
+ sdata->u.ap.force_unicast_rateidx = -1;
+ return 0;
+ }
+
+ mode = local->oper_hw_mode;
+ for (i = 0; i < mode->num_rates; i++) {
+ if (mode->rates[i].rate == rate) {
+ sdata->u.ap.force_unicast_rateidx = i;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+
+static int
+ieee80211_ioctl_max_ratectrl_rate(struct net_device *dev,
+ struct ieee80211_sub_if_data *sdata,
+ int rate)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ int i;
+
+ if (sdata->type != IEEE80211_IF_TYPE_AP)
+ return -ENOENT;
+
+ if (rate == 0) {
+ sdata->u.ap.max_ratectrl_rateidx = -1;
+ return 0;
+ }
+
+ mode = local->oper_hw_mode;
+ for (i = 0; i < mode->num_rates; i++) {
+ if (mode->rates[i].rate == rate) {
+ sdata->u.ap.max_ratectrl_rateidx = i;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+
+static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local,
+ struct ieee80211_key *key)
+{
+ struct ieee80211_key_conf *keyconf;
+ u8 addr[ETH_ALEN];
+
+ if (!key || key->alg != ALG_WEP || !key->force_sw_encrypt ||
+ (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP))
+ return;
+
+ memset(addr, 0xff, ETH_ALEN);
+ keyconf = ieee80211_key_data2conf(local, key);
+ if (keyconf && local->ops->set_key &&
+ local->ops->set_key(local_to_hw(local),
+ SET_KEY, addr, keyconf, 0) == 0) {
+ key->force_sw_encrypt =
+ !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT);
+ key->hw_key_idx = keyconf->hw_key_idx;
+ }
+ kfree(keyconf);
+}
+
+
+static void ieee80211_key_disable_hwaccel(struct ieee80211_local *local,
+ struct ieee80211_key *key)
+{
+ struct ieee80211_key_conf *keyconf;
+ u8 addr[ETH_ALEN];
+
+ if (!key || key->alg != ALG_WEP || key->force_sw_encrypt ||
+ (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP))
+ return;
+
+ memset(addr, 0xff, ETH_ALEN);
+ keyconf = ieee80211_key_data2conf(local, key);
+ if (keyconf && local->ops->set_key)
+ local->ops->set_key(local_to_hw(local), DISABLE_KEY,
+ addr, keyconf, 0);
+ kfree(keyconf);
+ key->force_sw_encrypt = 1;
+}
+
+
+static int ieee80211_ioctl_default_wep_only(struct ieee80211_local *local,
+ int value)
+{
+ int i;
+ struct ieee80211_sub_if_data *sdata;
+
+ local->default_wep_only = value;
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list)
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++)
+ if (value)
+ ieee80211_key_enable_hwaccel(local,
+ sdata->keys[i]);
+ else
+ ieee80211_key_disable_hwaccel(local,
+ sdata->keys[i]);
+ read_unlock(&local->sub_if_lock);
+
+ return 0;
+}
+
+
+void ieee80211_update_default_wep_only(struct ieee80211_local *local)
+{
+ int i = 0;
+ struct ieee80211_sub_if_data *sdata;
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+
+ if (sdata->dev == local->mdev)
+ continue;
+
+ /* If there is an AP interface then depend on userspace to
+ set default_wep_only correctly. */
+ if (sdata->type == IEEE80211_IF_TYPE_AP) {
+ read_unlock(&local->sub_if_lock);
+ return;
+ }
+
+ i++;
+ }
+
+ read_unlock(&local->sub_if_lock);
+
+ if (i <= 1)
+ ieee80211_ioctl_default_wep_only(local, 1);
+ else
+ ieee80211_ioctl_default_wep_only(local, 0);
+}
+
+
+static int ieee80211_ioctl_prism2_param(struct net_device *dev,
+ struct iw_request_info *info,
+ void *wrqu, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ int *i = (int *) extra;
+ int param = *i;
+ int value = *(i + 1);
+ int ret = 0;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ switch (param) {
+ case PRISM2_PARAM_IEEE_802_1X:
+ if (local->ops->set_ieee8021x)
+ ret = local->ops->set_ieee8021x(local_to_hw(local),
+ value);
+ if (ret)
+ printk(KERN_DEBUG "%s: failed to set IEEE 802.1X (%d) "
+ "for low-level driver\n", dev->name, value);
+ else
+ sdata->ieee802_1x = value;
+ break;
+
+ case PRISM2_PARAM_ANTSEL_TX:
+ local->hw.conf.antenna_sel_tx = value;
+ if (ieee80211_hw_config(local))
+ ret = -EINVAL;
+ break;
+
+ case PRISM2_PARAM_ANTSEL_RX:
+ local->hw.conf.antenna_sel_rx = value;
+ if (ieee80211_hw_config(local))
+ ret = -EINVAL;
+ break;
+
+ case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES:
+ local->cts_protect_erp_frames = value;
+ break;
+
+ case PRISM2_PARAM_DROP_UNENCRYPTED:
+ sdata->drop_unencrypted = value;
+ break;
+
+ case PRISM2_PARAM_PREAMBLE:
+ local->short_preamble = value;
+ break;
+
+ case PRISM2_PARAM_STAT_TIME:
+ if (!local->stat_time && value) {
+ local->stat_timer.expires = jiffies + HZ * value / 100;
+ add_timer(&local->stat_timer);
+ } else if (local->stat_time && !value) {
+ del_timer_sync(&local->stat_timer);
+ }
+ local->stat_time = value;
+ break;
+ case PRISM2_PARAM_SHORT_SLOT_TIME:
+ if (value)
+ local->hw.conf.flags |= IEEE80211_CONF_SHORT_SLOT_TIME;
+ else
+ local->hw.conf.flags &= ~IEEE80211_CONF_SHORT_SLOT_TIME;
+ if (ieee80211_hw_config(local))
+ ret = -EINVAL;
+ break;
+
+ case PRISM2_PARAM_NEXT_MODE:
+ local->next_mode = value;
+ break;
+
+ case PRISM2_PARAM_CLEAR_KEYS:
+ ret = ieee80211_ioctl_clear_keys(dev);
+ break;
+
+ case PRISM2_PARAM_RADIO_ENABLED:
+ ret = ieee80211_ioctl_set_radio_enabled(dev, value);
+ break;
+
+ case PRISM2_PARAM_ANTENNA_MODE:
+ local->hw.conf.antenna_mode = value;
+ if (ieee80211_hw_config(local))
+ ret = -EINVAL;
+ break;
+
+ case PRISM2_PARAM_STA_ANTENNA_SEL:
+ local->sta_antenna_sel = value;
+ break;
+
+ case PRISM2_PARAM_FORCE_UNICAST_RATE:
+ ret = ieee80211_ioctl_force_unicast_rate(dev, sdata, value);
+ break;
+
+ case PRISM2_PARAM_MAX_RATECTRL_RATE:
+ ret = ieee80211_ioctl_max_ratectrl_rate(dev, sdata, value);
+ break;
+
+ case PRISM2_PARAM_RATE_CTRL_NUM_UP:
+ local->rate_ctrl_num_up = value;
+ break;
+
+ case PRISM2_PARAM_RATE_CTRL_NUM_DOWN:
+ local->rate_ctrl_num_down = value;
+ break;
+
+ case PRISM2_PARAM_TX_POWER_REDUCTION:
+ if (value < 0)
+ ret = -EINVAL;
+ else
+ local->hw.conf.tx_power_reduction = value;
+ break;
+
+ case PRISM2_PARAM_KEY_TX_RX_THRESHOLD:
+ local->key_tx_rx_threshold = value;
+ break;
+
+ case PRISM2_PARAM_DEFAULT_WEP_ONLY:
+ ret = ieee80211_ioctl_default_wep_only(local, value);
+ break;
+
+ case PRISM2_PARAM_WIFI_WME_NOACK_TEST:
+ local->wifi_wme_noack_test = value;
+ break;
+
+ case PRISM2_PARAM_SCAN_FLAGS:
+ local->scan_flags = value;
+ break;
+
+ case PRISM2_PARAM_MIXED_CELL:
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ sdata->u.sta.mixed_cell = !!value;
+ break;
+
+ case PRISM2_PARAM_HW_MODES:
+ local->enabled_modes = value;
+ break;
+
+ case PRISM2_PARAM_CREATE_IBSS:
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ sdata->u.sta.create_ibss = !!value;
+ break;
+ case PRISM2_PARAM_WMM_ENABLED:
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ sdata->u.sta.wmm_enabled = !!value;
+ break;
+ case PRISM2_PARAM_RADAR_DETECT:
+ local->hw.conf.radar_detect = value;
+ break;
+ case PRISM2_PARAM_SPECTRUM_MGMT:
+ local->hw.conf.spect_mgmt = value;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+
+static int ieee80211_ioctl_get_prism2_param(struct net_device *dev,
+ struct iw_request_info *info,
+ void *wrqu, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ int *param = (int *) extra;
+ int ret = 0;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ switch (*param) {
+ case PRISM2_PARAM_IEEE_802_1X:
+ *param = sdata->ieee802_1x;
+ break;
+
+ case PRISM2_PARAM_ANTSEL_TX:
+ *param = local->hw.conf.antenna_sel_tx;
+ break;
+
+ case PRISM2_PARAM_ANTSEL_RX:
+ *param = local->hw.conf.antenna_sel_rx;
+ break;
+
+ case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES:
+ *param = local->cts_protect_erp_frames;
+ break;
+
+ case PRISM2_PARAM_DROP_UNENCRYPTED:
+ *param = sdata->drop_unencrypted;
+ break;
+
+ case PRISM2_PARAM_PREAMBLE:
+ *param = local->short_preamble;
+ break;
+
+ case PRISM2_PARAM_STAT_TIME:
+ *param = local->stat_time;
+ break;
+ case PRISM2_PARAM_SHORT_SLOT_TIME:
+ *param = !!(local->hw.conf.flags & IEEE80211_CONF_SHORT_SLOT_TIME);
+ break;
+
+ case PRISM2_PARAM_NEXT_MODE:
+ *param = local->next_mode;
+ break;
+
+ case PRISM2_PARAM_ANTENNA_MODE:
+ *param = local->hw.conf.antenna_mode;
+ break;
+
+ case PRISM2_PARAM_STA_ANTENNA_SEL:
+ *param = local->sta_antenna_sel;
+ break;
+
+ case PRISM2_PARAM_RATE_CTRL_NUM_UP:
+ *param = local->rate_ctrl_num_up;
+ break;
+
+ case PRISM2_PARAM_RATE_CTRL_NUM_DOWN:
+ *param = local->rate_ctrl_num_down;
+ break;
+
+ case PRISM2_PARAM_TX_POWER_REDUCTION:
+ *param = local->hw.conf.tx_power_reduction;
+ break;
+
+ case PRISM2_PARAM_KEY_TX_RX_THRESHOLD:
+ *param = local->key_tx_rx_threshold;
+ break;
+
+ case PRISM2_PARAM_DEFAULT_WEP_ONLY:
+ *param = local->default_wep_only;
+ break;
+
+ case PRISM2_PARAM_WIFI_WME_NOACK_TEST:
+ *param = local->wifi_wme_noack_test;
+ break;
+
+ case PRISM2_PARAM_SCAN_FLAGS:
+ *param = local->scan_flags;
+ break;
+
+ case PRISM2_PARAM_HW_MODES:
+ *param = local->enabled_modes;
+ break;
+
+ case PRISM2_PARAM_CREATE_IBSS:
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ *param = !!sdata->u.sta.create_ibss;
+ break;
+
+ case PRISM2_PARAM_MIXED_CELL:
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ *param = !!sdata->u.sta.mixed_cell;
+ break;
+ case PRISM2_PARAM_WMM_ENABLED:
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ ret = -EINVAL;
+ else
+ *param = !!sdata->u.sta.wmm_enabled;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+static int ieee80211_ioctl_siwmlme(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *data, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct iw_mlme *mlme = (struct iw_mlme *) extra;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ return -EINVAL;
+
+ switch (mlme->cmd) {
+ case IW_MLME_DEAUTH:
+ /* TODO: mlme->addr.sa_data */
+ return ieee80211_sta_deauthenticate(dev, mlme->reason_code);
+ case IW_MLME_DISASSOC:
+ /* TODO: mlme->addr.sa_data */
+ return ieee80211_sta_disassociate(dev, mlme->reason_code);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+
+static int ieee80211_ioctl_siwencode(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *erq, char *keybuf)
+{
+ struct ieee80211_sub_if_data *sdata;
+ int idx, i, alg = ALG_WEP;
+ u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ idx = erq->flags & IW_ENCODE_INDEX;
+ if (idx == 0) {
+ if (sdata->default_key)
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ if (sdata->default_key == sdata->keys[i]) {
+ idx = i;
+ break;
+ }
+ }
+ } else if (idx < 1 || idx > 4)
+ return -EINVAL;
+ else
+ idx--;
+
+ if (erq->flags & IW_ENCODE_DISABLED)
+ alg = ALG_NONE;
+ else if (erq->length == 0) {
+ /* No key data - just set the default TX key index */
+ if (sdata->default_key != sdata->keys[idx]) {
+ ieee80211_debugfs_key_remove_default(sdata);
+ sdata->default_key = sdata->keys[idx];
+ if (sdata->default_key)
+ ieee80211_debugfs_key_add_default(sdata);
+ }
+ return 0;
+ }
+
+ return ieee80211_set_encryption(
+ dev, bcaddr,
+ idx, alg,
+ !sdata->default_key,
+ keybuf, erq->length);
+}
+
+
+static int ieee80211_ioctl_giwencode(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *erq, char *key)
+{
+ struct ieee80211_sub_if_data *sdata;
+ int idx, i;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ idx = erq->flags & IW_ENCODE_INDEX;
+ if (idx < 1 || idx > 4) {
+ idx = -1;
+ if (!sdata->default_key)
+ idx = 0;
+ else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ if (sdata->default_key == sdata->keys[i]) {
+ idx = i;
+ break;
+ }
+ }
+ if (idx < 0)
+ return -EINVAL;
+ } else
+ idx--;
+
+ erq->flags = idx + 1;
+
+ if (!sdata->keys[idx]) {
+ erq->length = 0;
+ erq->flags |= IW_ENCODE_DISABLED;
+ return 0;
+ }
+
+ memcpy(key, sdata->keys[idx]->key,
+ min((int)erq->length, sdata->keys[idx]->keylen));
+ erq->length = sdata->keys[idx]->keylen;
+ erq->flags |= IW_ENCODE_ENABLED;
+
+ return 0;
+}
+
+static int ieee80211_ioctl_siwauth(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *data, char *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int ret = 0;
+
+ switch (data->flags & IW_AUTH_INDEX) {
+ case IW_AUTH_WPA_VERSION:
+ case IW_AUTH_CIPHER_PAIRWISE:
+ case IW_AUTH_CIPHER_GROUP:
+ case IW_AUTH_WPA_ENABLED:
+ case IW_AUTH_RX_UNENCRYPTED_EAPOL:
+ break;
+ case IW_AUTH_KEY_MGMT:
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ ret = -EINVAL;
+ else {
+ /*
+ * TODO: sdata->u.sta.key_mgmt does not match with WE18
+ * value completely; could consider modifying this to
+ * be closer to WE18. For now, this value is not really
+ * used for anything else than Privacy matching, so the
+ * current code here should be more or less OK.
+ */
+ if (data->value & IW_AUTH_KEY_MGMT_802_1X) {
+ sdata->u.sta.key_mgmt =
+ IEEE80211_KEY_MGMT_WPA_EAP;
+ } else if (data->value & IW_AUTH_KEY_MGMT_PSK) {
+ sdata->u.sta.key_mgmt =
+ IEEE80211_KEY_MGMT_WPA_PSK;
+ } else {
+ sdata->u.sta.key_mgmt =
+ IEEE80211_KEY_MGMT_NONE;
+ }
+ }
+ break;
+ case IW_AUTH_80211_AUTH_ALG:
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS)
+ sdata->u.sta.auth_algs = data->value;
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ case IW_AUTH_PRIVACY_INVOKED:
+ if (local->ops->set_privacy_invoked)
+ ret = local->ops->set_privacy_invoked(
+ local_to_hw(local), data->value);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+/* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */
+static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct iw_statistics *wstats = &local->wstats;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct sta_info *sta = NULL;
+
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS)
+ sta = sta_info_get(local, sdata->u.sta.bssid);
+ if (!sta) {
+ wstats->discard.fragment = 0;
+ wstats->discard.misc = 0;
+ wstats->qual.qual = 0;
+ wstats->qual.level = 0;
+ wstats->qual.noise = 0;
+ wstats->qual.updated = IW_QUAL_ALL_INVALID;
+ } else {
+ wstats->qual.level = sta->last_rssi;
+ wstats->qual.qual = sta->last_signal;
+ wstats->qual.noise = sta->last_noise;
+ wstats->qual.updated = local->wstats_flags;
+ sta_info_put(sta);
+ }
+ return wstats;
+}
+
+static int ieee80211_ioctl_giwauth(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *data, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int ret = 0;
+
+ switch (data->flags & IW_AUTH_INDEX) {
+ case IW_AUTH_80211_AUTH_ALG:
+ if (sdata->type == IEEE80211_IF_TYPE_STA ||
+ sdata->type == IEEE80211_IF_TYPE_IBSS)
+ data->value = sdata->u.sta.auth_algs;
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+
+static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_point *erq, char *extra)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
+ int alg, idx, i;
+
+ switch (ext->alg) {
+ case IW_ENCODE_ALG_NONE:
+ alg = ALG_NONE;
+ break;
+ case IW_ENCODE_ALG_WEP:
+ alg = ALG_WEP;
+ break;
+ case IW_ENCODE_ALG_TKIP:
+ alg = ALG_TKIP;
+ break;
+ case IW_ENCODE_ALG_CCMP:
+ alg = ALG_CCMP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (erq->flags & IW_ENCODE_DISABLED)
+ alg = ALG_NONE;
+
+ idx = erq->flags & IW_ENCODE_INDEX;
+ if (idx < 1 || idx > 4) {
+ idx = -1;
+ if (!sdata->default_key)
+ idx = 0;
+ else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ if (sdata->default_key == sdata->keys[i]) {
+ idx = i;
+ break;
+ }
+ }
+ if (idx < 0)
+ return -EINVAL;
+ } else
+ idx--;
+
+ return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg,
+ ext->ext_flags &
+ IW_ENCODE_EXT_SET_TX_KEY,
+ ext->key, ext->key_len);
+}
+
+
+static const struct iw_priv_args ieee80211_ioctl_priv[] = {
+ { PRISM2_IOCTL_PRISM2_PARAM,
+ IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, 0, "param" },
+ { PRISM2_IOCTL_GET_PRISM2_PARAM,
+ IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+ IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "get_param" },
+};
+
+/* Structures to export the Wireless Handlers */
+
+static const iw_handler ieee80211_handler[] =
+{
+ (iw_handler) NULL, /* SIOCSIWCOMMIT */
+ (iw_handler) ieee80211_ioctl_giwname, /* SIOCGIWNAME */
+ (iw_handler) NULL, /* SIOCSIWNWID */
+ (iw_handler) NULL, /* SIOCGIWNWID */
+ (iw_handler) ieee80211_ioctl_siwfreq, /* SIOCSIWFREQ */
+ (iw_handler) ieee80211_ioctl_giwfreq, /* SIOCGIWFREQ */
+ (iw_handler) ieee80211_ioctl_siwmode, /* SIOCSIWMODE */
+ (iw_handler) ieee80211_ioctl_giwmode, /* SIOCGIWMODE */
+ (iw_handler) NULL, /* SIOCSIWSENS */
+ (iw_handler) NULL, /* SIOCGIWSENS */
+ (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */
+ (iw_handler) ieee80211_ioctl_giwrange, /* SIOCGIWRANGE */
+ (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */
+ (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */
+ (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */
+ (iw_handler) NULL /* kernel code */, /* SIOCGIWSTATS */
+ iw_handler_set_spy, /* SIOCSIWSPY */
+ iw_handler_get_spy, /* SIOCGIWSPY */
+ iw_handler_set_thrspy, /* SIOCSIWTHRSPY */
+ iw_handler_get_thrspy, /* SIOCGIWTHRSPY */
+ (iw_handler) ieee80211_ioctl_siwap, /* SIOCSIWAP */
+ (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */
+ (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */
+ (iw_handler) NULL, /* SIOCGIWAPLIST */
+ (iw_handler) ieee80211_ioctl_siwscan, /* SIOCSIWSCAN */
+ (iw_handler) ieee80211_ioctl_giwscan, /* SIOCGIWSCAN */
+ (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */
+ (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */
+ (iw_handler) NULL, /* SIOCSIWNICKN */
+ (iw_handler) NULL, /* SIOCGIWNICKN */
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) NULL, /* SIOCSIWRATE */
+ (iw_handler) NULL, /* SIOCGIWRATE */
+ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */
+ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */
+ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */
+ (iw_handler) ieee80211_ioctl_giwfrag, /* SIOCGIWFRAG */
+ (iw_handler) NULL, /* SIOCSIWTXPOW */
+ (iw_handler) NULL, /* SIOCGIWTXPOW */
+ (iw_handler) ieee80211_ioctl_siwretry, /* SIOCSIWRETRY */
+ (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */
+ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */
+ (iw_handler) ieee80211_ioctl_giwencode, /* SIOCGIWENCODE */
+ (iw_handler) NULL, /* SIOCSIWPOWER */
+ (iw_handler) NULL, /* SIOCGIWPOWER */
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) ieee80211_ioctl_siwgenie, /* SIOCSIWGENIE */
+ (iw_handler) NULL, /* SIOCGIWGENIE */
+ (iw_handler) ieee80211_ioctl_siwauth, /* SIOCSIWAUTH */
+ (iw_handler) ieee80211_ioctl_giwauth, /* SIOCGIWAUTH */
+ (iw_handler) ieee80211_ioctl_siwencodeext, /* SIOCSIWENCODEEXT */
+ (iw_handler) NULL, /* SIOCGIWENCODEEXT */
+ (iw_handler) NULL, /* SIOCSIWPMKSA */
+ (iw_handler) NULL, /* -- hole -- */
+};
+
+static const iw_handler ieee80211_private_handler[] =
+{ /* SIOCIWFIRSTPRIV + */
+ (iw_handler) ieee80211_ioctl_prism2_param, /* 0 */
+ (iw_handler) ieee80211_ioctl_get_prism2_param, /* 1 */
+};
+
+const struct iw_handler_def ieee80211_iw_handler_def =
+{
+ .num_standard = ARRAY_SIZE(ieee80211_handler),
+ .num_private = ARRAY_SIZE(ieee80211_private_handler),
+ .num_private_args = ARRAY_SIZE(ieee80211_ioctl_priv),
+ .standard = (iw_handler *) ieee80211_handler,
+ .private = (iw_handler *) ieee80211_private_handler,
+ .private_args = (struct iw_priv_args *) ieee80211_ioctl_priv,
+ .get_wireless_stats = ieee80211_get_wireless_stats,
+};
diff --git a/net/mac80211/ieee80211_key.h b/net/mac80211/ieee80211_key.h
new file mode 100644
index 000000000000..c33384912782
--- /dev/null
+++ b/net/mac80211/ieee80211_key.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef IEEE80211_KEY_H
+#define IEEE80211_KEY_H
+
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/crypto.h>
+#include <net/mac80211.h>
+
+/* ALG_TKIP
+ * struct ieee80211_key::key is encoded as a 256-bit (32 byte) data block:
+ * Temporal Encryption Key (128 bits)
+ * Temporal Authenticator Tx MIC Key (64 bits)
+ * Temporal Authenticator Rx MIC Key (64 bits)
+ */
+
+#define WEP_IV_LEN 4
+#define WEP_ICV_LEN 4
+
+#define ALG_TKIP_KEY_LEN 32
+/* Starting offsets for each key */
+#define ALG_TKIP_TEMP_ENCR_KEY 0
+#define ALG_TKIP_TEMP_AUTH_TX_MIC_KEY 16
+#define ALG_TKIP_TEMP_AUTH_RX_MIC_KEY 24
+#define TKIP_IV_LEN 8
+#define TKIP_ICV_LEN 4
+
+#define ALG_CCMP_KEY_LEN 16
+#define CCMP_HDR_LEN 8
+#define CCMP_MIC_LEN 8
+#define CCMP_TK_LEN 16
+#define CCMP_PN_LEN 6
+
+#define NUM_RX_DATA_QUEUES 17
+
+struct ieee80211_key {
+ struct kref kref;
+
+ int hw_key_idx; /* filled and used by low-level driver */
+ ieee80211_key_alg alg;
+ union {
+ struct {
+ /* last used TSC */
+ u32 iv32;
+ u16 iv16;
+ u16 p1k[5];
+ int tx_initialized;
+
+ /* last received RSC */
+ u32 iv32_rx[NUM_RX_DATA_QUEUES];
+ u16 iv16_rx[NUM_RX_DATA_QUEUES];
+ u16 p1k_rx[NUM_RX_DATA_QUEUES][5];
+ int rx_initialized[NUM_RX_DATA_QUEUES];
+ } tkip;
+ struct {
+ u8 tx_pn[6];
+ u8 rx_pn[NUM_RX_DATA_QUEUES][6];
+ struct crypto_cipher *tfm;
+ u32 replays; /* dot11RSNAStatsCCMPReplays */
+ /* scratch buffers for virt_to_page() (crypto API) */
+#ifndef AES_BLOCK_LEN
+#define AES_BLOCK_LEN 16
+#endif
+ u8 tx_crypto_buf[6 * AES_BLOCK_LEN];
+ u8 rx_crypto_buf[6 * AES_BLOCK_LEN];
+ } ccmp;
+ } u;
+ int tx_rx_count; /* number of times this key has been used */
+ int keylen;
+
+ /* if the low level driver can provide hardware acceleration it should
+ * clear this flag */
+ unsigned int force_sw_encrypt:1;
+ unsigned int default_tx_key:1; /* This key is the new default TX key
+ * (used only for broadcast keys). */
+ s8 keyidx; /* WEP key index */
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct {
+ struct dentry *stalink;
+ struct dentry *dir;
+ struct dentry *keylen;
+ struct dentry *force_sw_encrypt;
+ struct dentry *keyidx;
+ struct dentry *hw_key_idx;
+ struct dentry *tx_rx_count;
+ struct dentry *algorithm;
+ struct dentry *tx_spec;
+ struct dentry *rx_spec;
+ struct dentry *replays;
+ struct dentry *key;
+ } debugfs;
+#endif
+
+ u8 key[0];
+};
+
+#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c
new file mode 100644
index 000000000000..719d75b20707
--- /dev/null
+++ b/net/mac80211/ieee80211_led.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* just for IFNAMSIZ */
+#include <linux/if.h>
+#include "ieee80211_led.h"
+
+void ieee80211_led_rx(struct ieee80211_local *local)
+{
+ if (unlikely(!local->rx_led))
+ return;
+ if (local->rx_led_counter++ % 2 == 0)
+ led_trigger_event(local->rx_led, LED_OFF);
+ else
+ led_trigger_event(local->rx_led, LED_FULL);
+}
+
+/* q is 1 if a packet was enqueued, 0 if it has been transmitted */
+void ieee80211_led_tx(struct ieee80211_local *local, int q)
+{
+ if (unlikely(!local->tx_led))
+ return;
+ /* not sure how this is supposed to work ... */
+ local->tx_led_counter += 2*q-1;
+ if (local->tx_led_counter % 2 == 0)
+ led_trigger_event(local->tx_led, LED_OFF);
+ else
+ led_trigger_event(local->tx_led, LED_FULL);
+}
+
+void ieee80211_led_init(struct ieee80211_local *local)
+{
+ local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+ if (!local->rx_led)
+ return;
+ snprintf(local->rx_led_name, sizeof(local->rx_led_name),
+ "%srx", wiphy_name(local->hw.wiphy));
+ local->rx_led->name = local->rx_led_name;
+ if (led_trigger_register(local->rx_led)) {
+ kfree(local->rx_led);
+ local->rx_led = NULL;
+ }
+
+ local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+ if (!local->tx_led)
+ return;
+ snprintf(local->tx_led_name, sizeof(local->tx_led_name),
+ "%stx", wiphy_name(local->hw.wiphy));
+ local->tx_led->name = local->tx_led_name;
+ if (led_trigger_register(local->tx_led)) {
+ kfree(local->tx_led);
+ local->tx_led = NULL;
+ }
+}
+
+void ieee80211_led_exit(struct ieee80211_local *local)
+{
+ if (local->tx_led) {
+ led_trigger_unregister(local->tx_led);
+ kfree(local->tx_led);
+ }
+ if (local->rx_led) {
+ led_trigger_unregister(local->rx_led);
+ kfree(local->rx_led);
+ }
+}
+
+char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (local->tx_led)
+ return local->tx_led_name;
+ return NULL;
+}
+EXPORT_SYMBOL(__ieee80211_get_tx_led_name);
+
+char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (local->rx_led)
+ return local->rx_led_name;
+ return NULL;
+}
+EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h
new file mode 100644
index 000000000000..5c8ab8263878
--- /dev/null
+++ b/net/mac80211/ieee80211_led.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/leds.h>
+#include "ieee80211_i.h"
+
+#ifdef CONFIG_MAC80211_LEDS
+extern void ieee80211_led_rx(struct ieee80211_local *local);
+extern void ieee80211_led_tx(struct ieee80211_local *local, int q);
+extern void ieee80211_led_init(struct ieee80211_local *local);
+extern void ieee80211_led_exit(struct ieee80211_local *local);
+#else
+static inline void ieee80211_led_rx(struct ieee80211_local *local)
+{
+}
+static inline void ieee80211_led_tx(struct ieee80211_local *local, int q)
+{
+}
+static inline void ieee80211_led_init(struct ieee80211_local *local)
+{
+}
+static inline void ieee80211_led_exit(struct ieee80211_local *local)
+{
+}
+#endif
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
new file mode 100644
index 000000000000..16e850864b8a
--- /dev/null
+++ b/net/mac80211/ieee80211_rate.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include "ieee80211_rate.h"
+#include "ieee80211_i.h"
+
+struct rate_control_alg {
+ struct list_head list;
+ struct rate_control_ops *ops;
+};
+
+static LIST_HEAD(rate_ctrl_algs);
+static DEFINE_MUTEX(rate_ctrl_mutex);
+
+int ieee80211_rate_control_register(struct rate_control_ops *ops)
+{
+ struct rate_control_alg *alg;
+
+ alg = kmalloc(sizeof(*alg), GFP_KERNEL);
+ if (alg == NULL) {
+ return -ENOMEM;
+ }
+ memset(alg, 0, sizeof(*alg));
+ alg->ops = ops;
+
+ mutex_lock(&rate_ctrl_mutex);
+ list_add_tail(&alg->list, &rate_ctrl_algs);
+ mutex_unlock(&rate_ctrl_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_rate_control_register);
+
+void ieee80211_rate_control_unregister(struct rate_control_ops *ops)
+{
+ struct rate_control_alg *alg;
+
+ mutex_lock(&rate_ctrl_mutex);
+ list_for_each_entry(alg, &rate_ctrl_algs, list) {
+ if (alg->ops == ops) {
+ list_del(&alg->list);
+ break;
+ }
+ }
+ mutex_unlock(&rate_ctrl_mutex);
+ kfree(alg);
+}
+EXPORT_SYMBOL(ieee80211_rate_control_unregister);
+
+static struct rate_control_ops *
+ieee80211_try_rate_control_ops_get(const char *name)
+{
+ struct rate_control_alg *alg;
+ struct rate_control_ops *ops = NULL;
+
+ mutex_lock(&rate_ctrl_mutex);
+ list_for_each_entry(alg, &rate_ctrl_algs, list) {
+ if (!name || !strcmp(alg->ops->name, name))
+ if (try_module_get(alg->ops->module)) {
+ ops = alg->ops;
+ break;
+ }
+ }
+ mutex_unlock(&rate_ctrl_mutex);
+ return ops;
+}
+
+/* Get the rate control algorithm. If `name' is NULL, get the first
+ * available algorithm. */
+static struct rate_control_ops *
+ieee80211_rate_control_ops_get(const char *name)
+{
+ struct rate_control_ops *ops;
+
+ ops = ieee80211_try_rate_control_ops_get(name);
+ if (!ops) {
+ request_module("rc80211_%s", name ? name : "default");
+ ops = ieee80211_try_rate_control_ops_get(name);
+ }
+ return ops;
+}
+
+static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops)
+{
+ module_put(ops->module);
+}
+
+struct rate_control_ref *rate_control_alloc(const char *name,
+ struct ieee80211_local *local)
+{
+ struct rate_control_ref *ref;
+
+ ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
+ if (!ref)
+ goto fail_ref;
+ kref_init(&ref->kref);
+ ref->ops = ieee80211_rate_control_ops_get(name);
+ if (!ref->ops)
+ goto fail_ops;
+ ref->priv = ref->ops->alloc(local);
+ if (!ref->priv)
+ goto fail_priv;
+ return ref;
+
+fail_priv:
+ ieee80211_rate_control_ops_put(ref->ops);
+fail_ops:
+ kfree(ref);
+fail_ref:
+ return NULL;
+}
+
+static void rate_control_release(struct kref *kref)
+{
+ struct rate_control_ref *ctrl_ref;
+
+ ctrl_ref = container_of(kref, struct rate_control_ref, kref);
+ ctrl_ref->ops->free(ctrl_ref->priv);
+ ieee80211_rate_control_ops_put(ctrl_ref->ops);
+ kfree(ctrl_ref);
+}
+
+struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
+{
+ kref_get(&ref->kref);
+ return ref;
+}
+
+void rate_control_put(struct rate_control_ref *ref)
+{
+ kref_put(&ref->kref, rate_control_release);
+}
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
new file mode 100644
index 000000000000..f021a028d9d0
--- /dev/null
+++ b/net/mac80211/ieee80211_rate.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef IEEE80211_RATE_H
+#define IEEE80211_RATE_H
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+
+#define RATE_CONTROL_NUM_DOWN 20
+#define RATE_CONTROL_NUM_UP 15
+
+
+struct rate_control_extra {
+ /* values from rate_control_get_rate() to the caller: */
+ struct ieee80211_rate *probe; /* probe with this rate, or NULL for no
+ * probing */
+ struct ieee80211_rate *nonerp;
+
+ /* parameters from the caller to rate_control_get_rate(): */
+ struct ieee80211_hw_mode *mode;
+ int mgmt_data; /* this is data frame that is used for management
+ * (e.g., IEEE 802.1X EAPOL) */
+ u16 ethertype;
+};
+
+
+struct rate_control_ops {
+ struct module *module;
+ const char *name;
+ void (*tx_status)(void *priv, struct net_device *dev,
+ struct sk_buff *skb,
+ struct ieee80211_tx_status *status);
+ struct ieee80211_rate *(*get_rate)(void *priv, struct net_device *dev,
+ struct sk_buff *skb,
+ struct rate_control_extra *extra);
+ void (*rate_init)(void *priv, void *priv_sta,
+ struct ieee80211_local *local, struct sta_info *sta);
+ void (*clear)(void *priv);
+
+ void *(*alloc)(struct ieee80211_local *local);
+ void (*free)(void *priv);
+ void *(*alloc_sta)(void *priv, gfp_t gfp);
+ void (*free_sta)(void *priv, void *priv_sta);
+
+ int (*add_attrs)(void *priv, struct kobject *kobj);
+ void (*remove_attrs)(void *priv, struct kobject *kobj);
+ void (*add_sta_debugfs)(void *priv, void *priv_sta,
+ struct dentry *dir);
+ void (*remove_sta_debugfs)(void *priv, void *priv_sta);
+};
+
+struct rate_control_ref {
+ struct rate_control_ops *ops;
+ void *priv;
+ struct kref kref;
+};
+
+int ieee80211_rate_control_register(struct rate_control_ops *ops);
+void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
+
+/* Get a reference to the rate control algorithm. If `name' is NULL, get the
+ * first available algorithm. */
+struct rate_control_ref *rate_control_alloc(const char *name,
+ struct ieee80211_local *local);
+struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
+void rate_control_put(struct rate_control_ref *ref);
+
+static inline void rate_control_tx_status(struct ieee80211_local *local,
+ struct net_device *dev,
+ struct sk_buff *skb,
+ struct ieee80211_tx_status *status)
+{
+ struct rate_control_ref *ref = local->rate_ctrl;
+ ref->ops->tx_status(ref->priv, dev, skb, status);
+}
+
+
+static inline struct ieee80211_rate *
+rate_control_get_rate(struct ieee80211_local *local, struct net_device *dev,
+ struct sk_buff *skb, struct rate_control_extra *extra)
+{
+ struct rate_control_ref *ref = local->rate_ctrl;
+ return ref->ops->get_rate(ref->priv, dev, skb, extra);
+}
+
+
+static inline void rate_control_rate_init(struct sta_info *sta,
+ struct ieee80211_local *local)
+{
+ struct rate_control_ref *ref = sta->rate_ctrl;
+ ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta);
+}
+
+
+static inline void rate_control_clear(struct ieee80211_local *local)
+{
+ struct rate_control_ref *ref = local->rate_ctrl;
+ ref->ops->clear(ref->priv);
+}
+
+static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
+ gfp_t gfp)
+{
+ return ref->ops->alloc_sta(ref->priv, gfp);
+}
+
+static inline void rate_control_free_sta(struct rate_control_ref *ref,
+ void *priv)
+{
+ ref->ops->free_sta(ref->priv, priv);
+}
+
+static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
+{
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct rate_control_ref *ref = sta->rate_ctrl;
+ if (sta->debugfs.dir && ref->ops->add_sta_debugfs)
+ ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv,
+ sta->debugfs.dir);
+#endif
+}
+
+static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
+{
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct rate_control_ref *ref = sta->rate_ctrl;
+ if (ref->ops->remove_sta_debugfs)
+ ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv);
+#endif
+}
+
+#endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
new file mode 100644
index 000000000000..822917debeff
--- /dev/null
+++ b/net/mac80211/ieee80211_sta.c
@@ -0,0 +1,3060 @@
+/*
+ * BSS client mode implementation
+ * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* TODO:
+ * BSS table: use <BSSID,SSID> as the key to support multi-SSID APs
+ * order BSS list by RSSI(?) ("quality of AP")
+ * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
+ * SSID)
+ */
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/wireless.h>
+#include <linux/random.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/iw_handler.h>
+#include <asm/types.h>
+#include <asm/delay.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "hostapd_ioctl.h"
+
+#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
+#define IEEE80211_AUTH_MAX_TRIES 3
+#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
+#define IEEE80211_ASSOC_MAX_TRIES 3
+#define IEEE80211_MONITORING_INTERVAL (2 * HZ)
+#define IEEE80211_PROBE_INTERVAL (60 * HZ)
+#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
+#define IEEE80211_SCAN_INTERVAL (2 * HZ)
+#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
+#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ)
+
+#define IEEE80211_PROBE_DELAY (HZ / 33)
+#define IEEE80211_CHANNEL_TIME (HZ / 33)
+#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
+#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
+#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
+#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
+
+#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
+
+
+#define IEEE80211_FC(type, stype) cpu_to_le16(type | stype)
+
+#define ERP_INFO_USE_PROTECTION BIT(1)
+
+static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
+ u8 *ssid, size_t ssid_len);
+static struct ieee80211_sta_bss *
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid);
+static void ieee80211_rx_bss_put(struct net_device *dev,
+ struct ieee80211_sta_bss *bss);
+static int ieee80211_sta_find_ibss(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta);
+static int ieee80211_sta_wep_configured(struct net_device *dev);
+static int ieee80211_sta_start_scan(struct net_device *dev,
+ u8 *ssid, size_t ssid_len);
+static int ieee80211_sta_config_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta);
+
+
+/* Parsed Information Elements */
+struct ieee802_11_elems {
+ u8 *ssid;
+ u8 ssid_len;
+ u8 *supp_rates;
+ u8 supp_rates_len;
+ u8 *fh_params;
+ u8 fh_params_len;
+ u8 *ds_params;
+ u8 ds_params_len;
+ u8 *cf_params;
+ u8 cf_params_len;
+ u8 *tim;
+ u8 tim_len;
+ u8 *ibss_params;
+ u8 ibss_params_len;
+ u8 *challenge;
+ u8 challenge_len;
+ u8 *wpa;
+ u8 wpa_len;
+ u8 *rsn;
+ u8 rsn_len;
+ u8 *erp_info;
+ u8 erp_info_len;
+ u8 *ext_supp_rates;
+ u8 ext_supp_rates_len;
+ u8 *wmm_info;
+ u8 wmm_info_len;
+ u8 *wmm_param;
+ u8 wmm_param_len;
+};
+
+typedef enum { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 } ParseRes;
+
+
+static ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
+ struct ieee802_11_elems *elems)
+{
+ size_t left = len;
+ u8 *pos = start;
+ int unknown = 0;
+
+ memset(elems, 0, sizeof(*elems));
+
+ while (left >= 2) {
+ u8 id, elen;
+
+ id = *pos++;
+ elen = *pos++;
+ left -= 2;
+
+ if (elen > left) {
+#if 0
+ if (net_ratelimit())
+ printk(KERN_DEBUG "IEEE 802.11 element parse "
+ "failed (id=%d elen=%d left=%d)\n",
+ id, elen, left);
+#endif
+ return ParseFailed;
+ }
+
+ switch (id) {
+ case WLAN_EID_SSID:
+ elems->ssid = pos;
+ elems->ssid_len = elen;
+ break;
+ case WLAN_EID_SUPP_RATES:
+ elems->supp_rates = pos;
+ elems->supp_rates_len = elen;
+ break;
+ case WLAN_EID_FH_PARAMS:
+ elems->fh_params = pos;
+ elems->fh_params_len = elen;
+ break;
+ case WLAN_EID_DS_PARAMS:
+ elems->ds_params = pos;
+ elems->ds_params_len = elen;
+ break;
+ case WLAN_EID_CF_PARAMS:
+ elems->cf_params = pos;
+ elems->cf_params_len = elen;
+ break;
+ case WLAN_EID_TIM:
+ elems->tim = pos;
+ elems->tim_len = elen;
+ break;
+ case WLAN_EID_IBSS_PARAMS:
+ elems->ibss_params = pos;
+ elems->ibss_params_len = elen;
+ break;
+ case WLAN_EID_CHALLENGE:
+ elems->challenge = pos;
+ elems->challenge_len = elen;
+ break;
+ case WLAN_EID_WPA:
+ if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
+ pos[2] == 0xf2) {
+ /* Microsoft OUI (00:50:F2) */
+ if (pos[3] == 1) {
+ /* OUI Type 1 - WPA IE */
+ elems->wpa = pos;
+ elems->wpa_len = elen;
+ } else if (elen >= 5 && pos[3] == 2) {
+ if (pos[4] == 0) {
+ elems->wmm_info = pos;
+ elems->wmm_info_len = elen;
+ } else if (pos[4] == 1) {
+ elems->wmm_param = pos;
+ elems->wmm_param_len = elen;
+ }
+ }
+ }
+ break;
+ case WLAN_EID_RSN:
+ elems->rsn = pos;
+ elems->rsn_len = elen;
+ break;
+ case WLAN_EID_ERP_INFO:
+ elems->erp_info = pos;
+ elems->erp_info_len = elen;
+ break;
+ case WLAN_EID_EXT_SUPP_RATES:
+ elems->ext_supp_rates = pos;
+ elems->ext_supp_rates_len = elen;
+ break;
+ default:
+#if 0
+ printk(KERN_DEBUG "IEEE 802.11 element parse ignored "
+ "unknown element (id=%d elen=%d)\n",
+ id, elen);
+#endif
+ unknown++;
+ break;
+ }
+
+ left -= elen;
+ pos += elen;
+ }
+
+ /* Do not trigger error if left == 1 as Apple Airport base stations
+ * send AssocResps that are one spurious byte too long. */
+
+ return unknown ? ParseUnknown : ParseOK;
+}
+
+
+
+
+static int ecw2cw(int ecw)
+{
+ int cw = 1;
+ while (ecw > 0) {
+ cw <<= 1;
+ ecw--;
+ }
+ return cw - 1;
+}
+
+
+static void ieee80211_sta_wmm_params(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ u8 *wmm_param, size_t wmm_param_len)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_tx_queue_params params;
+ size_t left;
+ int count;
+ u8 *pos;
+
+ if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
+ return;
+ count = wmm_param[6] & 0x0f;
+ if (count == ifsta->wmm_last_param_set)
+ return;
+ ifsta->wmm_last_param_set = count;
+
+ pos = wmm_param + 8;
+ left = wmm_param_len - 8;
+
+ memset(&params, 0, sizeof(params));
+
+ if (!local->ops->conf_tx)
+ return;
+
+ local->wmm_acm = 0;
+ for (; left >= 4; left -= 4, pos += 4) {
+ int aci = (pos[0] >> 5) & 0x03;
+ int acm = (pos[0] >> 4) & 0x01;
+ int queue;
+
+ switch (aci) {
+ case 1:
+ queue = IEEE80211_TX_QUEUE_DATA3;
+ if (acm) {
+ local->wmm_acm |= BIT(0) | BIT(3);
+ }
+ break;
+ case 2:
+ queue = IEEE80211_TX_QUEUE_DATA1;
+ if (acm) {
+ local->wmm_acm |= BIT(4) | BIT(5);
+ }
+ break;
+ case 3:
+ queue = IEEE80211_TX_QUEUE_DATA0;
+ if (acm) {
+ local->wmm_acm |= BIT(6) | BIT(7);
+ }
+ break;
+ case 0:
+ default:
+ queue = IEEE80211_TX_QUEUE_DATA2;
+ if (acm) {
+ local->wmm_acm |= BIT(1) | BIT(2);
+ }
+ break;
+ }
+
+ params.aifs = pos[0] & 0x0f;
+ params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
+ params.cw_min = ecw2cw(pos[1] & 0x0f);
+ /* TXOP is in units of 32 usec; burst_time in 0.1 ms */
+ params.burst_time = (pos[2] | (pos[3] << 8)) * 32 / 100;
+ printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
+ "cWmin=%d cWmax=%d burst=%d\n",
+ dev->name, queue, aci, acm, params.aifs, params.cw_min,
+ params.cw_max, params.burst_time);
+ /* TODO: handle ACM (block TX, fallback to next lowest allowed
+ * AC for now) */
+ if (local->ops->conf_tx(local_to_hw(local), queue, &params)) {
+ printk(KERN_DEBUG "%s: failed to set TX queue "
+ "parameters for queue %d\n", dev->name, queue);
+ }
+ }
+}
+
+
+static void ieee80211_sta_send_associnfo(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ char *buf;
+ size_t len;
+ int i;
+ union iwreq_data wrqu;
+
+ if (!ifsta->assocreq_ies && !ifsta->assocresp_ies)
+ return;
+
+ buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len +
+ ifsta->assocresp_ies_len), GFP_ATOMIC);
+ if (!buf)
+ return;
+
+ len = sprintf(buf, "ASSOCINFO(");
+ if (ifsta->assocreq_ies) {
+ len += sprintf(buf + len, "ReqIEs=");
+ for (i = 0; i < ifsta->assocreq_ies_len; i++) {
+ len += sprintf(buf + len, "%02x",
+ ifsta->assocreq_ies[i]);
+ }
+ }
+ if (ifsta->assocresp_ies) {
+ if (ifsta->assocreq_ies)
+ len += sprintf(buf + len, " ");
+ len += sprintf(buf + len, "RespIEs=");
+ for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+ len += sprintf(buf + len, "%02x",
+ ifsta->assocresp_ies[i]);
+ }
+ }
+ len += sprintf(buf + len, ")");
+
+ if (len > IW_CUSTOM_MAX) {
+ len = sprintf(buf, "ASSOCRESPIE=");
+ for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+ len += sprintf(buf + len, "%02x",
+ ifsta->assocresp_ies[i]);
+ }
+ }
+
+ memset(&wrqu, 0, sizeof(wrqu));
+ wrqu.data.length = len;
+ wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+
+ kfree(buf);
+}
+
+
+static void ieee80211_set_associated(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta, int assoc)
+{
+ union iwreq_data wrqu;
+
+ if (ifsta->associated == assoc)
+ return;
+
+ ifsta->associated = assoc;
+
+ if (assoc) {
+ struct ieee80211_sub_if_data *sdata;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return;
+ netif_carrier_on(dev);
+ ifsta->prev_bssid_set = 1;
+ memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
+ memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
+ ieee80211_sta_send_associnfo(dev, ifsta);
+ } else {
+ netif_carrier_off(dev);
+ memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN);
+ }
+ wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+ wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+ ifsta->last_probe = jiffies;
+}
+
+static void ieee80211_set_disassoc(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta, int deauth)
+{
+ if (deauth)
+ ifsta->auth_tries = 0;
+ ifsta->assoc_tries = 0;
+ ieee80211_set_associated(dev, ifsta, 0);
+}
+
+static void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
+ int encrypt)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_tx_packet_data *pkt_data;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ skb->dev = sdata->local->mdev;
+ skb_set_mac_header(skb, 0);
+ skb_set_network_header(skb, 0);
+ skb_set_transport_header(skb, 0);
+
+ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+ memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
+ pkt_data->ifindex = sdata->dev->ifindex;
+ pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT);
+ pkt_data->do_not_encrypt = !encrypt;
+
+ dev_queue_xmit(skb);
+}
+
+
+static void ieee80211_send_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ int transaction, u8 *extra, size_t extra_len,
+ int encrypt)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(*mgmt) + 6 + extra_len);
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
+ "frame\n", dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
+ memset(mgmt, 0, 24 + 6);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_AUTH);
+ if (encrypt)
+ mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+ mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
+ mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
+ ifsta->auth_transaction = transaction + 1;
+ mgmt->u.auth.status_code = cpu_to_le16(0);
+ if (extra)
+ memcpy(skb_put(skb, extra_len), extra, extra_len);
+
+ ieee80211_sta_tx(dev, skb, encrypt);
+}
+
+
+static void ieee80211_authenticate(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ ifsta->auth_tries++;
+ if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
+ printk(KERN_DEBUG "%s: authentication with AP " MAC_FMT
+ " timed out\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+ ifsta->state = IEEE80211_DISABLED;
+ return;
+ }
+
+ ifsta->state = IEEE80211_AUTHENTICATE;
+ printk(KERN_DEBUG "%s: authenticate with AP " MAC_FMT "\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+
+ ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0);
+
+ mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
+}
+
+
+static void ieee80211_send_assoc(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+ u8 *pos, *ies;
+ int i, len;
+ u16 capab;
+ struct ieee80211_sta_bss *bss;
+ int wmm = 0;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
+ ifsta->ssid_len);
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
+ "frame\n", dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mode = local->oper_hw_mode;
+ capab = ifsta->capab;
+ if (mode->mode == MODE_IEEE80211G) {
+ capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME |
+ WLAN_CAPABILITY_SHORT_PREAMBLE;
+ }
+ bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+ if (bss) {
+ if (bss->capability & WLAN_CAPABILITY_PRIVACY)
+ capab |= WLAN_CAPABILITY_PRIVACY;
+ if (bss->wmm_ie) {
+ wmm = 1;
+ }
+ ieee80211_rx_bss_put(dev, bss);
+ }
+
+ mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+
+ if (ifsta->prev_bssid_set) {
+ skb_put(skb, 10);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_REASSOC_REQ);
+ mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
+ mgmt->u.reassoc_req.listen_interval = cpu_to_le16(1);
+ memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid,
+ ETH_ALEN);
+ } else {
+ skb_put(skb, 4);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_ASSOC_REQ);
+ mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
+ mgmt->u.assoc_req.listen_interval = cpu_to_le16(1);
+ }
+
+ /* SSID */
+ ies = pos = skb_put(skb, 2 + ifsta->ssid_len);
+ *pos++ = WLAN_EID_SSID;
+ *pos++ = ifsta->ssid_len;
+ memcpy(pos, ifsta->ssid, ifsta->ssid_len);
+
+ len = mode->num_rates;
+ if (len > 8)
+ len = 8;
+ pos = skb_put(skb, len + 2);
+ *pos++ = WLAN_EID_SUPP_RATES;
+ *pos++ = len;
+ for (i = 0; i < len; i++) {
+ int rate = mode->rates[i].rate;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ rate /= 2;
+ *pos++ = (u8) (rate / 5);
+ }
+
+ if (mode->num_rates > len) {
+ pos = skb_put(skb, mode->num_rates - len + 2);
+ *pos++ = WLAN_EID_EXT_SUPP_RATES;
+ *pos++ = mode->num_rates - len;
+ for (i = len; i < mode->num_rates; i++) {
+ int rate = mode->rates[i].rate;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ rate /= 2;
+ *pos++ = (u8) (rate / 5);
+ }
+ }
+
+ if (ifsta->extra_ie) {
+ pos = skb_put(skb, ifsta->extra_ie_len);
+ memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len);
+ }
+
+ if (wmm && ifsta->wmm_enabled) {
+ pos = skb_put(skb, 9);
+ *pos++ = WLAN_EID_VENDOR_SPECIFIC;
+ *pos++ = 7; /* len */
+ *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
+ *pos++ = 0x50;
+ *pos++ = 0xf2;
+ *pos++ = 2; /* WME */
+ *pos++ = 0; /* WME info */
+ *pos++ = 1; /* WME ver */
+ *pos++ = 0;
+ }
+
+ kfree(ifsta->assocreq_ies);
+ ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
+ ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_ATOMIC);
+ if (ifsta->assocreq_ies)
+ memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len);
+
+ ieee80211_sta_tx(dev, skb, 0);
+}
+
+
+static void ieee80211_send_deauth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta, u16 reason)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for deauth "
+ "frame\n", dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_DEAUTH);
+ skb_put(skb, 2);
+ mgmt->u.deauth.reason_code = cpu_to_le16(reason);
+
+ ieee80211_sta_tx(dev, skb, 0);
+}
+
+
+static void ieee80211_send_disassoc(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta, u16 reason)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc "
+ "frame\n", dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_DISASSOC);
+ skb_put(skb, 2);
+ mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
+
+ ieee80211_sta_tx(dev, skb, 0);
+}
+
+
+static int ieee80211_privacy_mismatch(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_sta_bss *bss;
+ int res = 0;
+
+ if (!ifsta || ifsta->mixed_cell ||
+ ifsta->key_mgmt != IEEE80211_KEY_MGMT_NONE)
+ return 0;
+
+ bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+ if (!bss)
+ return 0;
+
+ if (ieee80211_sta_wep_configured(dev) !=
+ !!(bss->capability & WLAN_CAPABILITY_PRIVACY))
+ res = 1;
+
+ ieee80211_rx_bss_put(dev, bss);
+
+ return res;
+}
+
+
+static void ieee80211_associate(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ ifsta->assoc_tries++;
+ if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) {
+ printk(KERN_DEBUG "%s: association with AP " MAC_FMT
+ " timed out\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+ ifsta->state = IEEE80211_DISABLED;
+ return;
+ }
+
+ ifsta->state = IEEE80211_ASSOCIATE;
+ printk(KERN_DEBUG "%s: associate with AP " MAC_FMT "\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+ if (ieee80211_privacy_mismatch(dev, ifsta)) {
+ printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
+ "mixed-cell disabled - abort association\n", dev->name);
+ ifsta->state = IEEE80211_DISABLED;
+ return;
+ }
+
+ ieee80211_send_assoc(dev, ifsta);
+
+ mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT);
+}
+
+
+static void ieee80211_associated(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sta_info *sta;
+ int disassoc;
+
+ /* TODO: start monitoring current AP signal quality and number of
+ * missed beacons. Scan other channels every now and then and search
+ * for better APs. */
+ /* TODO: remove expired BSSes */
+
+ ifsta->state = IEEE80211_ASSOCIATED;
+
+ sta = sta_info_get(local, ifsta->bssid);
+ if (!sta) {
+ printk(KERN_DEBUG "%s: No STA entry for own AP " MAC_FMT "\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+ disassoc = 1;
+ } else {
+ disassoc = 0;
+ if (time_after(jiffies,
+ sta->last_rx + IEEE80211_MONITORING_INTERVAL)) {
+ if (ifsta->probereq_poll) {
+ printk(KERN_DEBUG "%s: No ProbeResp from "
+ "current AP " MAC_FMT " - assume out of "
+ "range\n",
+ dev->name, MAC_ARG(ifsta->bssid));
+ disassoc = 1;
+ sta_info_free(sta, 0);
+ ifsta->probereq_poll = 0;
+ } else {
+ ieee80211_send_probe_req(dev, ifsta->bssid,
+ local->scan_ssid,
+ local->scan_ssid_len);
+ ifsta->probereq_poll = 1;
+ }
+ } else {
+ ifsta->probereq_poll = 0;
+ if (time_after(jiffies, ifsta->last_probe +
+ IEEE80211_PROBE_INTERVAL)) {
+ ifsta->last_probe = jiffies;
+ ieee80211_send_probe_req(dev, ifsta->bssid,
+ ifsta->ssid,
+ ifsta->ssid_len);
+ }
+ }
+ sta_info_put(sta);
+ }
+ if (disassoc) {
+ union iwreq_data wrqu;
+ memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN);
+ wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+ wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+ mod_timer(&ifsta->timer, jiffies +
+ IEEE80211_MONITORING_INTERVAL + 30 * HZ);
+ } else {
+ mod_timer(&ifsta->timer, jiffies +
+ IEEE80211_MONITORING_INTERVAL);
+ }
+}
+
+
+static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
+ u8 *ssid, size_t ssid_len)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+ u8 *pos, *supp_rates, *esupp_rates = NULL;
+ int i;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
+ "request\n", dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_PROBE_REQ);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ if (dst) {
+ memcpy(mgmt->da, dst, ETH_ALEN);
+ memcpy(mgmt->bssid, dst, ETH_ALEN);
+ } else {
+ memset(mgmt->da, 0xff, ETH_ALEN);
+ memset(mgmt->bssid, 0xff, ETH_ALEN);
+ }
+ pos = skb_put(skb, 2 + ssid_len);
+ *pos++ = WLAN_EID_SSID;
+ *pos++ = ssid_len;
+ memcpy(pos, ssid, ssid_len);
+
+ supp_rates = skb_put(skb, 2);
+ supp_rates[0] = WLAN_EID_SUPP_RATES;
+ supp_rates[1] = 0;
+ mode = local->oper_hw_mode;
+ for (i = 0; i < mode->num_rates; i++) {
+ struct ieee80211_rate *rate = &mode->rates[i];
+ if (!(rate->flags & IEEE80211_RATE_SUPPORTED))
+ continue;
+ if (esupp_rates) {
+ pos = skb_put(skb, 1);
+ esupp_rates[1]++;
+ } else if (supp_rates[1] == 8) {
+ esupp_rates = skb_put(skb, 3);
+ esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
+ esupp_rates[1] = 1;
+ pos = &esupp_rates[2];
+ } else {
+ pos = skb_put(skb, 1);
+ supp_rates[1]++;
+ }
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ *pos = rate->rate / 10;
+ else
+ *pos = rate->rate / 5;
+ }
+
+ ieee80211_sta_tx(dev, skb, 0);
+}
+
+
+static int ieee80211_sta_wep_configured(struct net_device *dev)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (!sdata || !sdata->default_key ||
+ sdata->default_key->alg != ALG_WEP)
+ return 0;
+ return 1;
+}
+
+
+static void ieee80211_auth_completed(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ printk(KERN_DEBUG "%s: authenticated\n", dev->name);
+ ifsta->authenticated = 1;
+ ieee80211_associate(dev, ifsta);
+}
+
+
+static void ieee80211_auth_challenge(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ u8 *pos;
+ struct ieee802_11_elems elems;
+
+ printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
+ pos = mgmt->u.auth.variable;
+ if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
+ == ParseFailed) {
+ printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n",
+ dev->name);
+ return;
+ }
+ if (!elems.challenge) {
+ printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
+ "frame\n", dev->name);
+ return;
+ }
+ ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2,
+ elems.challenge_len + 2, 1);
+}
+
+
+static void ieee80211_rx_mgmt_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ u16 auth_alg, auth_transaction, status_code;
+
+ if (ifsta->state != IEEE80211_AUTHENTICATE &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS) {
+ printk(KERN_DEBUG "%s: authentication frame received from "
+ MAC_FMT ", but not in authenticate state - ignored\n",
+ dev->name, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (len < 24 + 6) {
+ printk(KERN_DEBUG "%s: too short (%zd) authentication frame "
+ "received from " MAC_FMT " - ignored\n",
+ dev->name, len, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS &&
+ memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
+ printk(KERN_DEBUG "%s: authentication frame received from "
+ "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - "
+ "ignored\n", dev->name, MAC_ARG(mgmt->sa),
+ MAC_ARG(mgmt->bssid));
+ return;
+ }
+
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS &&
+ memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) {
+ printk(KERN_DEBUG "%s: authentication frame received from "
+ "unknown BSSID (SA=" MAC_FMT " BSSID=" MAC_FMT ") - "
+ "ignored\n", dev->name, MAC_ARG(mgmt->sa),
+ MAC_ARG(mgmt->bssid));
+ return;
+ }
+
+ auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
+ auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
+ status_code = le16_to_cpu(mgmt->u.auth.status_code);
+
+ printk(KERN_DEBUG "%s: RX authentication from " MAC_FMT " (alg=%d "
+ "transaction=%d status=%d)\n",
+ dev->name, MAC_ARG(mgmt->sa), auth_alg,
+ auth_transaction, status_code);
+
+ if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ /* IEEE 802.11 standard does not require authentication in IBSS
+ * networks and most implementations do not seem to use it.
+ * However, try to reply to authentication attempts if someone
+ * has actually implemented this.
+ * TODO: Could implement shared key authentication. */
+ if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) {
+ printk(KERN_DEBUG "%s: unexpected IBSS authentication "
+ "frame (alg=%d transaction=%d)\n",
+ dev->name, auth_alg, auth_transaction);
+ return;
+ }
+ ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0);
+ }
+
+ if (auth_alg != ifsta->auth_alg ||
+ auth_transaction != ifsta->auth_transaction) {
+ printk(KERN_DEBUG "%s: unexpected authentication frame "
+ "(alg=%d transaction=%d)\n",
+ dev->name, auth_alg, auth_transaction);
+ return;
+ }
+
+ if (status_code != WLAN_STATUS_SUCCESS) {
+ printk(KERN_DEBUG "%s: AP denied authentication (auth_alg=%d "
+ "code=%d)\n", dev->name, ifsta->auth_alg, status_code);
+ if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG) {
+ u8 algs[3];
+ const int num_algs = ARRAY_SIZE(algs);
+ int i, pos;
+ algs[0] = algs[1] = algs[2] = 0xff;
+ if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN)
+ algs[0] = WLAN_AUTH_OPEN;
+ if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
+ algs[1] = WLAN_AUTH_SHARED_KEY;
+ if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP)
+ algs[2] = WLAN_AUTH_LEAP;
+ if (ifsta->auth_alg == WLAN_AUTH_OPEN)
+ pos = 0;
+ else if (ifsta->auth_alg == WLAN_AUTH_SHARED_KEY)
+ pos = 1;
+ else
+ pos = 2;
+ for (i = 0; i < num_algs; i++) {
+ pos++;
+ if (pos >= num_algs)
+ pos = 0;
+ if (algs[pos] == ifsta->auth_alg ||
+ algs[pos] == 0xff)
+ continue;
+ if (algs[pos] == WLAN_AUTH_SHARED_KEY &&
+ !ieee80211_sta_wep_configured(dev))
+ continue;
+ ifsta->auth_alg = algs[pos];
+ printk(KERN_DEBUG "%s: set auth_alg=%d for "
+ "next try\n",
+ dev->name, ifsta->auth_alg);
+ break;
+ }
+ }
+ return;
+ }
+
+ switch (ifsta->auth_alg) {
+ case WLAN_AUTH_OPEN:
+ case WLAN_AUTH_LEAP:
+ ieee80211_auth_completed(dev, ifsta);
+ break;
+ case WLAN_AUTH_SHARED_KEY:
+ if (ifsta->auth_transaction == 4)
+ ieee80211_auth_completed(dev, ifsta);
+ else
+ ieee80211_auth_challenge(dev, ifsta, mgmt, len);
+ break;
+ }
+}
+
+
+static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ u16 reason_code;
+
+ if (len < 24 + 2) {
+ printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame "
+ "received from " MAC_FMT " - ignored\n",
+ dev->name, len, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
+ printk(KERN_DEBUG "%s: deauthentication frame received from "
+ "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - "
+ "ignored\n", dev->name, MAC_ARG(mgmt->sa),
+ MAC_ARG(mgmt->bssid));
+ return;
+ }
+
+ reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
+
+ printk(KERN_DEBUG "%s: RX deauthentication from " MAC_FMT
+ " (reason=%d)\n",
+ dev->name, MAC_ARG(mgmt->sa), reason_code);
+
+ if (ifsta->authenticated) {
+ printk(KERN_DEBUG "%s: deauthenticated\n", dev->name);
+ }
+
+ if (ifsta->state == IEEE80211_AUTHENTICATE ||
+ ifsta->state == IEEE80211_ASSOCIATE ||
+ ifsta->state == IEEE80211_ASSOCIATED) {
+ ifsta->state = IEEE80211_AUTHENTICATE;
+ mod_timer(&ifsta->timer, jiffies +
+ IEEE80211_RETRY_AUTH_INTERVAL);
+ }
+
+ ieee80211_set_disassoc(dev, ifsta, 1);
+ ifsta->authenticated = 0;
+}
+
+
+static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ u16 reason_code;
+
+ if (len < 24 + 2) {
+ printk(KERN_DEBUG "%s: too short (%zd) disassociation frame "
+ "received from " MAC_FMT " - ignored\n",
+ dev->name, len, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
+ printk(KERN_DEBUG "%s: disassociation frame received from "
+ "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - "
+ "ignored\n", dev->name, MAC_ARG(mgmt->sa),
+ MAC_ARG(mgmt->bssid));
+ return;
+ }
+
+ reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
+
+ printk(KERN_DEBUG "%s: RX disassociation from " MAC_FMT
+ " (reason=%d)\n",
+ dev->name, MAC_ARG(mgmt->sa), reason_code);
+
+ if (ifsta->associated)
+ printk(KERN_DEBUG "%s: disassociated\n", dev->name);
+
+ if (ifsta->state == IEEE80211_ASSOCIATED) {
+ ifsta->state = IEEE80211_ASSOCIATE;
+ mod_timer(&ifsta->timer, jiffies +
+ IEEE80211_RETRY_AUTH_INTERVAL);
+ }
+
+ ieee80211_set_disassoc(dev, ifsta, 0);
+}
+
+
+static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len,
+ int reassoc)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw_mode *mode;
+ struct sta_info *sta;
+ u32 rates;
+ u16 capab_info, status_code, aid;
+ struct ieee802_11_elems elems;
+ u8 *pos;
+ int i, j;
+
+ /* AssocResp and ReassocResp have identical structure, so process both
+ * of them in this function. */
+
+ if (ifsta->state != IEEE80211_ASSOCIATE) {
+ printk(KERN_DEBUG "%s: association frame received from "
+ MAC_FMT ", but not in associate state - ignored\n",
+ dev->name, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (len < 24 + 6) {
+ printk(KERN_DEBUG "%s: too short (%zd) association frame "
+ "received from " MAC_FMT " - ignored\n",
+ dev->name, len, MAC_ARG(mgmt->sa));
+ return;
+ }
+
+ if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
+ printk(KERN_DEBUG "%s: association frame received from "
+ "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - "
+ "ignored\n", dev->name, MAC_ARG(mgmt->sa),
+ MAC_ARG(mgmt->bssid));
+ return;
+ }
+
+ capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
+ status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+ aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
+ if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
+ printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
+ "set\n", dev->name, aid);
+ aid &= ~(BIT(15) | BIT(14));
+
+ printk(KERN_DEBUG "%s: RX %sssocResp from " MAC_FMT " (capab=0x%x "
+ "status=%d aid=%d)\n",
+ dev->name, reassoc ? "Rea" : "A", MAC_ARG(mgmt->sa),
+ capab_info, status_code, aid);
+
+ if (status_code != WLAN_STATUS_SUCCESS) {
+ printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
+ dev->name, status_code);
+ return;
+ }
+
+ pos = mgmt->u.assoc_resp.variable;
+ if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
+ == ParseFailed) {
+ printk(KERN_DEBUG "%s: failed to parse AssocResp\n",
+ dev->name);
+ return;
+ }
+
+ if (!elems.supp_rates) {
+ printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
+ dev->name);
+ return;
+ }
+
+ printk(KERN_DEBUG "%s: associated\n", dev->name);
+ ifsta->aid = aid;
+ ifsta->ap_capab = capab_info;
+
+ kfree(ifsta->assocresp_ies);
+ ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt);
+ ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_ATOMIC);
+ if (ifsta->assocresp_ies)
+ memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len);
+
+ ieee80211_set_associated(dev, ifsta, 1);
+
+ /* Add STA entry for the AP */
+ sta = sta_info_get(local, ifsta->bssid);
+ if (!sta) {
+ struct ieee80211_sta_bss *bss;
+ sta = sta_info_add(local, dev, ifsta->bssid, GFP_ATOMIC);
+ if (!sta) {
+ printk(KERN_DEBUG "%s: failed to add STA entry for the"
+ " AP\n", dev->name);
+ return;
+ }
+ bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+ if (bss) {
+ sta->last_rssi = bss->rssi;
+ sta->last_signal = bss->signal;
+ sta->last_noise = bss->noise;
+ ieee80211_rx_bss_put(dev, bss);
+ }
+ }
+
+ sta->dev = dev;
+ sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC;
+ sta->assoc_ap = 1;
+
+ rates = 0;
+ mode = local->oper_hw_mode;
+ for (i = 0; i < elems.supp_rates_len; i++) {
+ int rate = (elems.supp_rates[i] & 0x7f) * 5;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ rate *= 2;
+ for (j = 0; j < mode->num_rates; j++)
+ if (mode->rates[j].rate == rate)
+ rates |= BIT(j);
+ }
+ for (i = 0; i < elems.ext_supp_rates_len; i++) {
+ int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ rate *= 2;
+ for (j = 0; j < mode->num_rates; j++)
+ if (mode->rates[j].rate == rate)
+ rates |= BIT(j);
+ }
+ sta->supp_rates = rates;
+
+ rate_control_rate_init(sta, local);
+
+ if (elems.wmm_param && ifsta->wmm_enabled) {
+ sta->flags |= WLAN_STA_WME;
+ ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
+ elems.wmm_param_len);
+ }
+
+
+ sta_info_put(sta);
+
+ ieee80211_associated(dev, ifsta);
+}
+
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_add(struct net_device *dev,
+ struct ieee80211_sta_bss *bss)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ bss->hnext = local->sta_bss_hash[STA_HASH(bss->bssid)];
+ local->sta_bss_hash[STA_HASH(bss->bssid)] = bss;
+}
+
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_del(struct net_device *dev,
+ struct ieee80211_sta_bss *bss)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *b, *prev = NULL;
+ b = local->sta_bss_hash[STA_HASH(bss->bssid)];
+ while (b) {
+ if (b == bss) {
+ if (!prev)
+ local->sta_bss_hash[STA_HASH(bss->bssid)] =
+ bss->hnext;
+ else
+ prev->hnext = bss->hnext;
+ break;
+ }
+ prev = b;
+ b = b->hnext;
+ }
+}
+
+
+static struct ieee80211_sta_bss *
+ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *bss;
+
+ bss = kmalloc(sizeof(*bss), GFP_ATOMIC);
+ if (!bss)
+ return NULL;
+ memset(bss, 0, sizeof(*bss));
+ atomic_inc(&bss->users);
+ atomic_inc(&bss->users);
+ memcpy(bss->bssid, bssid, ETH_ALEN);
+
+ spin_lock_bh(&local->sta_bss_lock);
+ /* TODO: order by RSSI? */
+ list_add_tail(&bss->list, &local->sta_bss_list);
+ __ieee80211_rx_bss_hash_add(dev, bss);
+ spin_unlock_bh(&local->sta_bss_lock);
+ return bss;
+}
+
+
+static struct ieee80211_sta_bss *
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *bss;
+
+ spin_lock_bh(&local->sta_bss_lock);
+ bss = local->sta_bss_hash[STA_HASH(bssid)];
+ while (bss) {
+ if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) {
+ atomic_inc(&bss->users);
+ break;
+ }
+ bss = bss->hnext;
+ }
+ spin_unlock_bh(&local->sta_bss_lock);
+ return bss;
+}
+
+
+static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
+{
+ kfree(bss->wpa_ie);
+ kfree(bss->rsn_ie);
+ kfree(bss->wmm_ie);
+ kfree(bss);
+}
+
+
+static void ieee80211_rx_bss_put(struct net_device *dev,
+ struct ieee80211_sta_bss *bss)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ if (!atomic_dec_and_test(&bss->users))
+ return;
+
+ spin_lock_bh(&local->sta_bss_lock);
+ __ieee80211_rx_bss_hash_del(dev, bss);
+ list_del(&bss->list);
+ spin_unlock_bh(&local->sta_bss_lock);
+ ieee80211_rx_bss_free(bss);
+}
+
+
+void ieee80211_rx_bss_list_init(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ spin_lock_init(&local->sta_bss_lock);
+ INIT_LIST_HEAD(&local->sta_bss_list);
+}
+
+
+void ieee80211_rx_bss_list_deinit(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *bss, *tmp;
+
+ list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
+ ieee80211_rx_bss_put(dev, bss);
+}
+
+
+static void ieee80211_rx_bss_info(struct net_device *dev,
+ struct ieee80211_mgmt *mgmt,
+ size_t len,
+ struct ieee80211_rx_status *rx_status,
+ int beacon)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee802_11_elems elems;
+ size_t baselen;
+ int channel, invalid = 0, clen;
+ struct ieee80211_sta_bss *bss;
+ struct sta_info *sta;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ u64 timestamp;
+
+ if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN))
+ return; /* ignore ProbeResp to foreign address */
+
+#if 0
+ printk(KERN_DEBUG "%s: RX %s from " MAC_FMT " to " MAC_FMT "\n",
+ dev->name, beacon ? "Beacon" : "Probe Response",
+ MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da));
+#endif
+
+ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
+ if (baselen > len)
+ return;
+
+ timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
+
+ if (sdata->type == IEEE80211_IF_TYPE_IBSS && beacon &&
+ memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ static unsigned long last_tsf_debug = 0;
+ u64 tsf;
+ if (local->ops->get_tsf)
+ tsf = local->ops->get_tsf(local_to_hw(local));
+ else
+ tsf = -1LLU;
+ if (time_after(jiffies, last_tsf_debug + 5 * HZ)) {
+ printk(KERN_DEBUG "RX beacon SA=" MAC_FMT " BSSID="
+ MAC_FMT " TSF=0x%llx BCN=0x%llx diff=%lld "
+ "@%lu\n",
+ MAC_ARG(mgmt->sa), MAC_ARG(mgmt->bssid),
+ (unsigned long long)tsf,
+ (unsigned long long)timestamp,
+ (unsigned long long)(tsf - timestamp),
+ jiffies);
+ last_tsf_debug = jiffies;
+ }
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+ }
+
+ if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
+ &elems) == ParseFailed)
+ invalid = 1;
+
+ if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
+ memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
+ (sta = sta_info_get(local, mgmt->sa))) {
+ struct ieee80211_hw_mode *mode;
+ struct ieee80211_rate *rates;
+ size_t num_rates;
+ u32 supp_rates, prev_rates;
+ int i, j;
+
+ mode = local->sta_scanning ?
+ local->scan_hw_mode : local->oper_hw_mode;
+ rates = mode->rates;
+ num_rates = mode->num_rates;
+
+ supp_rates = 0;
+ for (i = 0; i < elems.supp_rates_len +
+ elems.ext_supp_rates_len; i++) {
+ u8 rate = 0;
+ int own_rate;
+ if (i < elems.supp_rates_len)
+ rate = elems.supp_rates[i];
+ else if (elems.ext_supp_rates)
+ rate = elems.ext_supp_rates
+ [i - elems.supp_rates_len];
+ own_rate = 5 * (rate & 0x7f);
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ own_rate *= 2;
+ for (j = 0; j < num_rates; j++)
+ if (rates[j].rate == own_rate)
+ supp_rates |= BIT(j);
+ }
+
+ prev_rates = sta->supp_rates;
+ sta->supp_rates &= supp_rates;
+ if (sta->supp_rates == 0) {
+ /* No matching rates - this should not really happen.
+ * Make sure that at least one rate is marked
+ * supported to avoid issues with TX rate ctrl. */
+ sta->supp_rates = sdata->u.sta.supp_rates_bits;
+ }
+ if (sta->supp_rates != prev_rates) {
+ printk(KERN_DEBUG "%s: updated supp_rates set for "
+ MAC_FMT " based on beacon info (0x%x & 0x%x -> "
+ "0x%x)\n",
+ dev->name, MAC_ARG(sta->addr), prev_rates,
+ supp_rates, sta->supp_rates);
+ }
+ sta_info_put(sta);
+ }
+
+ if (!elems.ssid)
+ return;
+
+ if (elems.ds_params && elems.ds_params_len == 1)
+ channel = elems.ds_params[0];
+ else
+ channel = rx_status->channel;
+
+ bss = ieee80211_rx_bss_get(dev, mgmt->bssid);
+ if (!bss) {
+ bss = ieee80211_rx_bss_add(dev, mgmt->bssid);
+ if (!bss)
+ return;
+ } else {
+#if 0
+ /* TODO: order by RSSI? */
+ spin_lock_bh(&local->sta_bss_lock);
+ list_move_tail(&bss->list, &local->sta_bss_list);
+ spin_unlock_bh(&local->sta_bss_lock);
+#endif
+ }
+
+ if (bss->probe_resp && beacon) {
+ /* Do not allow beacon to override data from Probe Response. */
+ ieee80211_rx_bss_put(dev, bss);
+ return;
+ }
+
+ bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
+ bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
+ if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) {
+ memcpy(bss->ssid, elems.ssid, elems.ssid_len);
+ bss->ssid_len = elems.ssid_len;
+ }
+
+ bss->supp_rates_len = 0;
+ if (elems.supp_rates) {
+ clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+ if (clen > elems.supp_rates_len)
+ clen = elems.supp_rates_len;
+ memcpy(&bss->supp_rates[bss->supp_rates_len], elems.supp_rates,
+ clen);
+ bss->supp_rates_len += clen;
+ }
+ if (elems.ext_supp_rates) {
+ clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+ if (clen > elems.ext_supp_rates_len)
+ clen = elems.ext_supp_rates_len;
+ memcpy(&bss->supp_rates[bss->supp_rates_len],
+ elems.ext_supp_rates, clen);
+ bss->supp_rates_len += clen;
+ }
+
+ if (elems.wpa &&
+ (!bss->wpa_ie || bss->wpa_ie_len != elems.wpa_len ||
+ memcmp(bss->wpa_ie, elems.wpa, elems.wpa_len))) {
+ kfree(bss->wpa_ie);
+ bss->wpa_ie = kmalloc(elems.wpa_len + 2, GFP_ATOMIC);
+ if (bss->wpa_ie) {
+ memcpy(bss->wpa_ie, elems.wpa - 2, elems.wpa_len + 2);
+ bss->wpa_ie_len = elems.wpa_len + 2;
+ } else
+ bss->wpa_ie_len = 0;
+ } else if (!elems.wpa && bss->wpa_ie) {
+ kfree(bss->wpa_ie);
+ bss->wpa_ie = NULL;
+ bss->wpa_ie_len = 0;
+ }
+
+ if (elems.rsn &&
+ (!bss->rsn_ie || bss->rsn_ie_len != elems.rsn_len ||
+ memcmp(bss->rsn_ie, elems.rsn, elems.rsn_len))) {
+ kfree(bss->rsn_ie);
+ bss->rsn_ie = kmalloc(elems.rsn_len + 2, GFP_ATOMIC);
+ if (bss->rsn_ie) {
+ memcpy(bss->rsn_ie, elems.rsn - 2, elems.rsn_len + 2);
+ bss->rsn_ie_len = elems.rsn_len + 2;
+ } else
+ bss->rsn_ie_len = 0;
+ } else if (!elems.rsn && bss->rsn_ie) {
+ kfree(bss->rsn_ie);
+ bss->rsn_ie = NULL;
+ bss->rsn_ie_len = 0;
+ }
+
+ if (elems.wmm_param &&
+ (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_param_len ||
+ memcmp(bss->wmm_ie, elems.wmm_param, elems.wmm_param_len))) {
+ kfree(bss->wmm_ie);
+ bss->wmm_ie = kmalloc(elems.wmm_param_len + 2, GFP_ATOMIC);
+ if (bss->wmm_ie) {
+ memcpy(bss->wmm_ie, elems.wmm_param - 2,
+ elems.wmm_param_len + 2);
+ bss->wmm_ie_len = elems.wmm_param_len + 2;
+ } else
+ bss->wmm_ie_len = 0;
+ } else if (!elems.wmm_param && bss->wmm_ie) {
+ kfree(bss->wmm_ie);
+ bss->wmm_ie = NULL;
+ bss->wmm_ie_len = 0;
+ }
+
+
+ bss->hw_mode = rx_status->phymode;
+ bss->channel = channel;
+ bss->freq = rx_status->freq;
+ if (channel != rx_status->channel &&
+ (bss->hw_mode == MODE_IEEE80211G ||
+ bss->hw_mode == MODE_IEEE80211B) &&
+ channel >= 1 && channel <= 14) {
+ static const int freq_list[] = {
+ 2412, 2417, 2422, 2427, 2432, 2437, 2442,
+ 2447, 2452, 2457, 2462, 2467, 2472, 2484
+ };
+ /* IEEE 802.11g/b mode can receive packets from neighboring
+ * channels, so map the channel into frequency. */
+ bss->freq = freq_list[channel - 1];
+ }
+ bss->timestamp = timestamp;
+ bss->last_update = jiffies;
+ bss->rssi = rx_status->ssi;
+ bss->signal = rx_status->signal;
+ bss->noise = rx_status->noise;
+ if (!beacon)
+ bss->probe_resp++;
+ ieee80211_rx_bss_put(dev, bss);
+}
+
+
+static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev,
+ struct ieee80211_mgmt *mgmt,
+ size_t len,
+ struct ieee80211_rx_status *rx_status)
+{
+ ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 0);
+}
+
+
+static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
+ struct ieee80211_mgmt *mgmt,
+ size_t len,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_sta *ifsta;
+ int use_protection;
+ size_t baselen;
+ struct ieee802_11_elems elems;
+
+ ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return;
+ ifsta = &sdata->u.sta;
+
+ if (!ifsta->associated ||
+ memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
+ return;
+
+ /* Process beacon from the current BSS */
+ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
+ if (baselen > len)
+ return;
+
+ if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
+ &elems) == ParseFailed)
+ return;
+
+ use_protection = 0;
+ if (elems.erp_info && elems.erp_info_len >= 1) {
+ use_protection =
+ (elems.erp_info[0] & ERP_INFO_USE_PROTECTION) != 0;
+ }
+
+ if (use_protection != !!ifsta->use_protection) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: CTS protection %s (BSSID="
+ MAC_FMT ")\n",
+ dev->name,
+ use_protection ? "enabled" : "disabled",
+ MAC_ARG(ifsta->bssid));
+ }
+ ifsta->use_protection = use_protection ? 1 : 0;
+ local->cts_protect_erp_frames = use_protection;
+ }
+
+ if (elems.wmm_param && ifsta->wmm_enabled) {
+ ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
+ elems.wmm_param_len);
+ }
+}
+
+
+static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int tx_last_beacon;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *resp;
+ u8 *pos, *end;
+
+ if (sdata->type != IEEE80211_IF_TYPE_IBSS ||
+ ifsta->state != IEEE80211_IBSS_JOINED ||
+ len < 24 + 2 || !ifsta->probe_resp)
+ return;
+
+ if (local->ops->tx_last_beacon)
+ tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local));
+ else
+ tx_last_beacon = 1;
+
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG "%s: RX ProbeReq SA=" MAC_FMT " DA=" MAC_FMT " BSSID="
+ MAC_FMT " (tx_last_beacon=%d)\n",
+ dev->name, MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da),
+ MAC_ARG(mgmt->bssid), tx_last_beacon);
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+
+ if (!tx_last_beacon)
+ return;
+
+ if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0 &&
+ memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0)
+ return;
+
+ end = ((u8 *) mgmt) + len;
+ pos = mgmt->u.probe_req.variable;
+ if (pos[0] != WLAN_EID_SSID ||
+ pos + 2 + pos[1] > end) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
+ "from " MAC_FMT "\n",
+ dev->name, MAC_ARG(mgmt->sa));
+ }
+ return;
+ }
+ if (pos[1] != 0 &&
+ (pos[1] != ifsta->ssid_len ||
+ memcmp(pos + 2, ifsta->ssid, ifsta->ssid_len) != 0)) {
+ /* Ignore ProbeReq for foreign SSID */
+ return;
+ }
+
+ /* Reply with ProbeResp */
+ skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ resp = (struct ieee80211_mgmt *) skb->data;
+ memcpy(resp->da, mgmt->sa, ETH_ALEN);
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG "%s: Sending ProbeResp to " MAC_FMT "\n",
+ dev->name, MAC_ARG(resp->da));
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+ ieee80211_sta_tx(dev, skb, 0);
+}
+
+
+void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_sta *ifsta;
+ struct ieee80211_mgmt *mgmt;
+ u16 fc;
+
+ if (skb->len < 24)
+ goto fail;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ ifsta = &sdata->u.sta;
+
+ mgmt = (struct ieee80211_mgmt *) skb->data;
+ fc = le16_to_cpu(mgmt->frame_control);
+
+ switch (fc & IEEE80211_FCTL_STYPE) {
+ case IEEE80211_STYPE_PROBE_REQ:
+ case IEEE80211_STYPE_PROBE_RESP:
+ case IEEE80211_STYPE_BEACON:
+ memcpy(skb->cb, rx_status, sizeof(*rx_status));
+ case IEEE80211_STYPE_AUTH:
+ case IEEE80211_STYPE_ASSOC_RESP:
+ case IEEE80211_STYPE_REASSOC_RESP:
+ case IEEE80211_STYPE_DEAUTH:
+ case IEEE80211_STYPE_DISASSOC:
+ skb_queue_tail(&ifsta->skb_queue, skb);
+ queue_work(local->hw.workqueue, &ifsta->work);
+ return;
+ default:
+ printk(KERN_DEBUG "%s: received unknown management frame - "
+ "stype=%d\n", dev->name,
+ (fc & IEEE80211_FCTL_STYPE) >> 4);
+ break;
+ }
+
+ fail:
+ kfree_skb(skb);
+}
+
+
+static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ struct ieee80211_rx_status *rx_status;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_sta *ifsta;
+ struct ieee80211_mgmt *mgmt;
+ u16 fc;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ ifsta = &sdata->u.sta;
+
+ rx_status = (struct ieee80211_rx_status *) skb->cb;
+ mgmt = (struct ieee80211_mgmt *) skb->data;
+ fc = le16_to_cpu(mgmt->frame_control);
+
+ switch (fc & IEEE80211_FCTL_STYPE) {
+ case IEEE80211_STYPE_PROBE_REQ:
+ ieee80211_rx_mgmt_probe_req(dev, ifsta, mgmt, skb->len,
+ rx_status);
+ break;
+ case IEEE80211_STYPE_PROBE_RESP:
+ ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status);
+ break;
+ case IEEE80211_STYPE_BEACON:
+ ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status);
+ break;
+ case IEEE80211_STYPE_AUTH:
+ ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len);
+ break;
+ case IEEE80211_STYPE_ASSOC_RESP:
+ ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 0);
+ break;
+ case IEEE80211_STYPE_REASSOC_RESP:
+ ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 1);
+ break;
+ case IEEE80211_STYPE_DEAUTH:
+ ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len);
+ break;
+ case IEEE80211_STYPE_DISASSOC:
+ ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len);
+ break;
+ }
+
+ kfree_skb(skb);
+}
+
+
+void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_mgmt *mgmt;
+ u16 fc;
+
+ if (skb->len < 24) {
+ dev_kfree_skb(skb);
+ return;
+ }
+
+ mgmt = (struct ieee80211_mgmt *) skb->data;
+ fc = le16_to_cpu(mgmt->frame_control);
+
+ if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) {
+ if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) {
+ ieee80211_rx_mgmt_probe_resp(dev, mgmt,
+ skb->len, rx_status);
+ } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
+ ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len,
+ rx_status);
+ }
+ }
+
+ dev_kfree_skb(skb);
+}
+
+
+static int ieee80211_sta_active_ibss(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ int active = 0;
+ struct sta_info *sta;
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ if (sta->dev == dev &&
+ time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
+ jiffies)) {
+ active++;
+ break;
+ }
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ return active;
+}
+
+
+static void ieee80211_sta_expire(struct net_device *dev)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sta_info *sta, *tmp;
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
+ if (time_after(jiffies, sta->last_rx +
+ IEEE80211_IBSS_INACTIVITY_LIMIT)) {
+ printk(KERN_DEBUG "%s: expiring inactive STA " MAC_FMT
+ "\n", dev->name, MAC_ARG(sta->addr));
+ sta_info_free(sta, 1);
+ }
+ spin_unlock_bh(&local->sta_lock);
+}
+
+
+static void ieee80211_sta_merge_ibss(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
+
+ ieee80211_sta_expire(dev);
+ if (ieee80211_sta_active_ibss(dev))
+ return;
+
+ printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
+ "IBSS networks with same SSID (merge)\n", dev->name);
+ ieee80211_sta_req_scan(dev, ifsta->ssid, ifsta->ssid_len);
+}
+
+
+void ieee80211_sta_timer(unsigned long data)
+{
+ struct ieee80211_sub_if_data *sdata =
+ (struct ieee80211_sub_if_data *) data;
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+ struct ieee80211_local *local = wdev_priv(&sdata->wdev);
+
+ set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
+ queue_work(local->hw.workqueue, &ifsta->work);
+}
+
+
+void ieee80211_sta_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data, u.sta.work);
+ struct net_device *dev = sdata->dev;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_if_sta *ifsta;
+ struct sk_buff *skb;
+
+ if (!netif_running(dev))
+ return;
+
+ if (local->sta_scanning)
+ return;
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS) {
+ printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface "
+ "(type=%d)\n", dev->name, sdata->type);
+ return;
+ }
+ ifsta = &sdata->u.sta;
+
+ while ((skb = skb_dequeue(&ifsta->skb_queue)))
+ ieee80211_sta_rx_queued_mgmt(dev, skb);
+
+ if (ifsta->state != IEEE80211_AUTHENTICATE &&
+ ifsta->state != IEEE80211_ASSOCIATE &&
+ test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
+ ieee80211_sta_start_scan(dev, NULL, 0);
+ return;
+ }
+
+ if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
+ if (ieee80211_sta_config_auth(dev, ifsta))
+ return;
+ clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
+ } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
+ return;
+
+ switch (ifsta->state) {
+ case IEEE80211_DISABLED:
+ break;
+ case IEEE80211_AUTHENTICATE:
+ ieee80211_authenticate(dev, ifsta);
+ break;
+ case IEEE80211_ASSOCIATE:
+ ieee80211_associate(dev, ifsta);
+ break;
+ case IEEE80211_ASSOCIATED:
+ ieee80211_associated(dev, ifsta);
+ break;
+ case IEEE80211_IBSS_SEARCH:
+ ieee80211_sta_find_ibss(dev, ifsta);
+ break;
+ case IEEE80211_IBSS_JOINED:
+ ieee80211_sta_merge_ibss(dev, ifsta);
+ break;
+ default:
+ printk(KERN_DEBUG "ieee80211_sta_work: Unknown state %d\n",
+ ifsta->state);
+ break;
+ }
+
+ if (ieee80211_privacy_mismatch(dev, ifsta)) {
+ printk(KERN_DEBUG "%s: privacy configuration mismatch and "
+ "mixed-cell disabled - disassociate\n", dev->name);
+
+ ieee80211_send_disassoc(dev, ifsta, WLAN_REASON_UNSPECIFIED);
+ ieee80211_set_disassoc(dev, ifsta, 0);
+ }
+}
+
+
+static void ieee80211_sta_reset_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (local->ops->reset_tsf) {
+ /* Reset own TSF to allow time synchronization work. */
+ local->ops->reset_tsf(local_to_hw(local));
+ }
+
+ ifsta->wmm_last_param_set = -1; /* allow any WMM update */
+
+
+ if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN)
+ ifsta->auth_alg = WLAN_AUTH_OPEN;
+ else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
+ ifsta->auth_alg = WLAN_AUTH_SHARED_KEY;
+ else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP)
+ ifsta->auth_alg = WLAN_AUTH_LEAP;
+ else
+ ifsta->auth_alg = WLAN_AUTH_OPEN;
+ printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name,
+ ifsta->auth_alg);
+ ifsta->auth_transaction = -1;
+ ifsta->associated = ifsta->auth_tries = ifsta->assoc_tries = 0;
+ netif_carrier_off(dev);
+}
+
+
+void ieee80211_sta_req_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return;
+
+ if ((ifsta->bssid_set || ifsta->auto_bssid_sel) &&
+ (ifsta->ssid_set || ifsta->auto_ssid_sel)) {
+ set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
+ queue_work(local->hw.workqueue, &ifsta->work);
+ }
+}
+
+static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
+ const char *ssid, int ssid_len)
+{
+ int tmp, hidden_ssid;
+
+ if (!memcmp(ifsta->ssid, ssid, ssid_len))
+ return 1;
+
+ if (ifsta->auto_bssid_sel)
+ return 0;
+
+ hidden_ssid = 1;
+ tmp = ssid_len;
+ while (tmp--) {
+ if (ssid[tmp] != '\0') {
+ hidden_ssid = 0;
+ break;
+ }
+ }
+
+ if (hidden_ssid && ifsta->ssid_len == ssid_len)
+ return 1;
+
+ if (ssid_len == 1 && ssid[0] == ' ')
+ return 1;
+
+ return 0;
+}
+
+static int ieee80211_sta_config_auth(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_sta_bss *bss, *selected = NULL;
+ int top_rssi = 0, freq;
+
+ rtnl_lock();
+
+ if (!ifsta->auto_channel_sel && !ifsta->auto_bssid_sel &&
+ !ifsta->auto_ssid_sel) {
+ ifsta->state = IEEE80211_AUTHENTICATE;
+ rtnl_unlock();
+ ieee80211_sta_reset_auth(dev, ifsta);
+ return 0;
+ }
+
+ spin_lock_bh(&local->sta_bss_lock);
+ freq = local->oper_channel->freq;
+ list_for_each_entry(bss, &local->sta_bss_list, list) {
+ if (!(bss->capability & WLAN_CAPABILITY_ESS))
+ continue;
+
+ if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
+ !!sdata->default_key)
+ continue;
+
+ if (!ifsta->auto_channel_sel && bss->freq != freq)
+ continue;
+
+ if (!ifsta->auto_bssid_sel &&
+ memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
+ continue;
+
+ if (!ifsta->auto_ssid_sel &&
+ !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
+ continue;
+
+ if (!selected || top_rssi < bss->rssi) {
+ selected = bss;
+ top_rssi = bss->rssi;
+ }
+ }
+ if (selected)
+ atomic_inc(&selected->users);
+ spin_unlock_bh(&local->sta_bss_lock);
+
+ if (selected) {
+ ieee80211_set_channel(local, -1, selected->freq);
+ if (!ifsta->ssid_set)
+ ieee80211_sta_set_ssid(dev, selected->ssid,
+ selected->ssid_len);
+ ieee80211_sta_set_bssid(dev, selected->bssid);
+ ieee80211_rx_bss_put(dev, selected);
+ ifsta->state = IEEE80211_AUTHENTICATE;
+ rtnl_unlock();
+ ieee80211_sta_reset_auth(dev, ifsta);
+ return 0;
+ } else {
+ if (ifsta->state != IEEE80211_AUTHENTICATE) {
+ ieee80211_sta_start_scan(dev, NULL, 0);
+ ifsta->state = IEEE80211_AUTHENTICATE;
+ set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
+ } else
+ ifsta->state = IEEE80211_DISABLED;
+ }
+ rtnl_unlock();
+ return -1;
+}
+
+static int ieee80211_sta_join_ibss(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta,
+ struct ieee80211_sta_bss *bss)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ int res, rates, i, j;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+ struct ieee80211_tx_control control;
+ struct ieee80211_rate *rate;
+ struct ieee80211_hw_mode *mode;
+ struct rate_control_extra extra;
+ u8 *pos;
+ struct ieee80211_sub_if_data *sdata;
+
+ /* Remove possible STA entries from other IBSS networks. */
+ sta_info_flush(local, NULL);
+
+ if (local->ops->reset_tsf) {
+ /* Reset own TSF to allow time synchronization work. */
+ local->ops->reset_tsf(local_to_hw(local));
+ }
+ memcpy(ifsta->bssid, bss->bssid, ETH_ALEN);
+ res = ieee80211_if_config(dev);
+ if (res)
+ return res;
+
+ local->hw.conf.beacon_int = bss->beacon_int >= 10 ? bss->beacon_int : 10;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ sdata->drop_unencrypted = bss->capability &
+ WLAN_CAPABILITY_PRIVACY ? 1 : 0;
+
+ res = ieee80211_set_channel(local, -1, bss->freq);
+
+ if (!(local->oper_channel->flag & IEEE80211_CHAN_W_IBSS)) {
+ printk(KERN_DEBUG "%s: IBSS not allowed on channel %d "
+ "(%d MHz)\n", dev->name, local->hw.conf.channel,
+ local->hw.conf.freq);
+ return -1;
+ }
+
+ /* Set beacon template based on scan results */
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
+ do {
+ if (!skb)
+ break;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ mgmt = (struct ieee80211_mgmt *)
+ skb_put(skb, 24 + sizeof(mgmt->u.beacon));
+ memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
+ mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_BEACON);
+ memset(mgmt->da, 0xff, ETH_ALEN);
+ memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+ mgmt->u.beacon.beacon_int =
+ cpu_to_le16(local->hw.conf.beacon_int);
+ mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability);
+
+ pos = skb_put(skb, 2 + ifsta->ssid_len);
+ *pos++ = WLAN_EID_SSID;
+ *pos++ = ifsta->ssid_len;
+ memcpy(pos, ifsta->ssid, ifsta->ssid_len);
+
+ rates = bss->supp_rates_len;
+ if (rates > 8)
+ rates = 8;
+ pos = skb_put(skb, 2 + rates);
+ *pos++ = WLAN_EID_SUPP_RATES;
+ *pos++ = rates;
+ memcpy(pos, bss->supp_rates, rates);
+
+ pos = skb_put(skb, 2 + 1);
+ *pos++ = WLAN_EID_DS_PARAMS;
+ *pos++ = 1;
+ *pos++ = bss->channel;
+
+ pos = skb_put(skb, 2 + 2);
+ *pos++ = WLAN_EID_IBSS_PARAMS;
+ *pos++ = 2;
+ /* FIX: set ATIM window based on scan results */
+ *pos++ = 0;
+ *pos++ = 0;
+
+ if (bss->supp_rates_len > 8) {
+ rates = bss->supp_rates_len - 8;
+ pos = skb_put(skb, 2 + rates);
+ *pos++ = WLAN_EID_EXT_SUPP_RATES;
+ *pos++ = rates;
+ memcpy(pos, &bss->supp_rates[8], rates);
+ }
+
+ memset(&control, 0, sizeof(control));
+ memset(&extra, 0, sizeof(extra));
+ extra.mode = local->oper_hw_mode;
+ rate = rate_control_get_rate(local, dev, skb, &extra);
+ if (!rate) {
+ printk(KERN_DEBUG "%s: Failed to determine TX rate "
+ "for IBSS beacon\n", dev->name);
+ break;
+ }
+ control.tx_rate = (local->short_preamble &&
+ (rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
+ rate->val2 : rate->val;
+ control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
+ control.power_level = local->hw.conf.power_level;
+ control.flags |= IEEE80211_TXCTL_NO_ACK;
+ control.retry_limit = 1;
+
+ ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC);
+ if (ifsta->probe_resp) {
+ mgmt = (struct ieee80211_mgmt *)
+ ifsta->probe_resp->data;
+ mgmt->frame_control =
+ IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+ IEEE80211_STYPE_PROBE_RESP);
+ } else {
+ printk(KERN_DEBUG "%s: Could not allocate ProbeResp "
+ "template for IBSS\n", dev->name);
+ }
+
+ if (local->ops->beacon_update &&
+ local->ops->beacon_update(local_to_hw(local),
+ skb, &control) == 0) {
+ printk(KERN_DEBUG "%s: Configured IBSS beacon "
+ "template based on scan results\n", dev->name);
+ skb = NULL;
+ }
+
+ rates = 0;
+ mode = local->oper_hw_mode;
+ for (i = 0; i < bss->supp_rates_len; i++) {
+ int bitrate = (bss->supp_rates[i] & 0x7f) * 5;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ bitrate *= 2;
+ for (j = 0; j < mode->num_rates; j++)
+ if (mode->rates[j].rate == bitrate)
+ rates |= BIT(j);
+ }
+ ifsta->supp_rates_bits = rates;
+ } while (0);
+
+ if (skb) {
+ printk(KERN_DEBUG "%s: Failed to configure IBSS beacon "
+ "template\n", dev->name);
+ dev_kfree_skb(skb);
+ }
+
+ ifsta->state = IEEE80211_IBSS_JOINED;
+ mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
+
+ ieee80211_rx_bss_put(dev, bss);
+
+ return res;
+}
+
+
+static int ieee80211_sta_create_ibss(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *bss;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hw_mode *mode;
+ u8 bssid[ETH_ALEN], *pos;
+ int i;
+
+#if 0
+ /* Easier testing, use fixed BSSID. */
+ memset(bssid, 0xfe, ETH_ALEN);
+#else
+ /* Generate random, not broadcast, locally administered BSSID. Mix in
+ * own MAC address to make sure that devices that do not have proper
+ * random number generator get different BSSID. */
+ get_random_bytes(bssid, ETH_ALEN);
+ for (i = 0; i < ETH_ALEN; i++)
+ bssid[i] ^= dev->dev_addr[i];
+ bssid[0] &= ~0x01;
+ bssid[0] |= 0x02;
+#endif
+
+ printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID " MAC_FMT "\n",
+ dev->name, MAC_ARG(bssid));
+
+ bss = ieee80211_rx_bss_add(dev, bssid);
+ if (!bss)
+ return -ENOMEM;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ mode = local->oper_hw_mode;
+
+ if (local->hw.conf.beacon_int == 0)
+ local->hw.conf.beacon_int = 100;
+ bss->beacon_int = local->hw.conf.beacon_int;
+ bss->hw_mode = local->hw.conf.phymode;
+ bss->channel = local->hw.conf.channel;
+ bss->freq = local->hw.conf.freq;
+ bss->last_update = jiffies;
+ bss->capability = WLAN_CAPABILITY_IBSS;
+ if (sdata->default_key) {
+ bss->capability |= WLAN_CAPABILITY_PRIVACY;
+ } else
+ sdata->drop_unencrypted = 0;
+ bss->supp_rates_len = mode->num_rates;
+ pos = bss->supp_rates;
+ for (i = 0; i < mode->num_rates; i++) {
+ int rate = mode->rates[i].rate;
+ if (mode->mode == MODE_ATHEROS_TURBO)
+ rate /= 2;
+ *pos++ = (u8) (rate / 5);
+ }
+
+ return ieee80211_sta_join_ibss(dev, ifsta, bss);
+}
+
+
+static int ieee80211_sta_find_ibss(struct net_device *dev,
+ struct ieee80211_if_sta *ifsta)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sta_bss *bss;
+ int found = 0;
+ u8 bssid[ETH_ALEN];
+ int active_ibss;
+
+ if (ifsta->ssid_len == 0)
+ return -EINVAL;
+
+ active_ibss = ieee80211_sta_active_ibss(dev);
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
+ dev->name, active_ibss);
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+ spin_lock_bh(&local->sta_bss_lock);
+ list_for_each_entry(bss, &local->sta_bss_list, list) {
+ if (ifsta->ssid_len != bss->ssid_len ||
+ memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0
+ || !(bss->capability & WLAN_CAPABILITY_IBSS))
+ continue;
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG " bssid=" MAC_FMT " found\n",
+ MAC_ARG(bss->bssid));
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+ memcpy(bssid, bss->bssid, ETH_ALEN);
+ found = 1;
+ if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0)
+ break;
+ }
+ spin_unlock_bh(&local->sta_bss_lock);
+
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG " sta_find_ibss: selected " MAC_FMT " current "
+ MAC_FMT "\n", MAC_ARG(bssid), MAC_ARG(ifsta->bssid));
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
+ (bss = ieee80211_rx_bss_get(dev, bssid))) {
+ printk(KERN_DEBUG "%s: Selected IBSS BSSID " MAC_FMT
+ " based on configured SSID\n",
+ dev->name, MAC_ARG(bssid));
+ return ieee80211_sta_join_ibss(dev, ifsta, bss);
+ }
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+ printk(KERN_DEBUG " did not try to join ibss\n");
+#endif /* CONFIG_MAC80211_IBSS_DEBUG */
+
+ /* Selected IBSS not found in current scan results - try to scan */
+ if (ifsta->state == IEEE80211_IBSS_JOINED &&
+ !ieee80211_sta_active_ibss(dev)) {
+ mod_timer(&ifsta->timer, jiffies +
+ IEEE80211_IBSS_MERGE_INTERVAL);
+ } else if (time_after(jiffies, local->last_scan_completed +
+ IEEE80211_SCAN_INTERVAL)) {
+ printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
+ "join\n", dev->name);
+ return ieee80211_sta_req_scan(dev, ifsta->ssid,
+ ifsta->ssid_len);
+ } else if (ifsta->state != IEEE80211_IBSS_JOINED) {
+ int interval = IEEE80211_SCAN_INTERVAL;
+
+ if (time_after(jiffies, ifsta->ibss_join_req +
+ IEEE80211_IBSS_JOIN_TIMEOUT)) {
+ if (ifsta->create_ibss &&
+ local->oper_channel->flag & IEEE80211_CHAN_W_IBSS)
+ return ieee80211_sta_create_ibss(dev, ifsta);
+ if (ifsta->create_ibss) {
+ printk(KERN_DEBUG "%s: IBSS not allowed on the"
+ " configured channel %d (%d MHz)\n",
+ dev->name, local->hw.conf.channel,
+ local->hw.conf.freq);
+ }
+
+ /* No IBSS found - decrease scan interval and continue
+ * scanning. */
+ interval = IEEE80211_SCAN_INTERVAL_SLOW;
+ }
+
+ ifsta->state = IEEE80211_IBSS_SEARCH;
+ mod_timer(&ifsta->timer, jiffies + interval);
+ return 0;
+ }
+
+ return 0;
+}
+
+
+int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_sta *ifsta;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+
+ /* TODO: This should always be done for IBSS, even if IEEE80211_QOS is
+ * not defined. */
+ if (local->ops->conf_tx) {
+ struct ieee80211_tx_queue_params qparam;
+ int i;
+
+ memset(&qparam, 0, sizeof(qparam));
+ /* TODO: are these ok defaults for all hw_modes? */
+ qparam.aifs = 2;
+ qparam.cw_min =
+ local->hw.conf.phymode == MODE_IEEE80211B ? 31 : 15;
+ qparam.cw_max = 1023;
+ qparam.burst_time = 0;
+ for (i = IEEE80211_TX_QUEUE_DATA0; i < NUM_TX_DATA_QUEUES; i++)
+ {
+ local->ops->conf_tx(local_to_hw(local),
+ i + IEEE80211_TX_QUEUE_DATA0,
+ &qparam);
+ }
+ /* IBSS uses different parameters for Beacon sending */
+ qparam.cw_min++;
+ qparam.cw_min *= 2;
+ qparam.cw_min--;
+ local->ops->conf_tx(local_to_hw(local),
+ IEEE80211_TX_QUEUE_BEACON, &qparam);
+ }
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ ifsta = &sdata->u.sta;
+
+ if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0)
+ ifsta->prev_bssid_set = 0;
+ memcpy(ifsta->ssid, ssid, len);
+ memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len);
+ ifsta->ssid_len = len;
+
+ ifsta->ssid_set = len ? 1 : 0;
+ if (sdata->type == IEEE80211_IF_TYPE_IBSS && !ifsta->bssid_set) {
+ ifsta->ibss_join_req = jiffies;
+ ifsta->state = IEEE80211_IBSS_SEARCH;
+ return ieee80211_sta_find_ibss(dev, ifsta);
+ }
+ return 0;
+}
+
+
+int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+ memcpy(ssid, ifsta->ssid, ifsta->ssid_len);
+ *len = ifsta->ssid_len;
+ return 0;
+}
+
+
+int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_if_sta *ifsta;
+ int res;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ ifsta = &sdata->u.sta;
+
+ if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
+ memcpy(ifsta->bssid, bssid, ETH_ALEN);
+ res = ieee80211_if_config(dev);
+ if (res) {
+ printk(KERN_DEBUG "%s: Failed to config new BSSID to "
+ "the low-level driver\n", dev->name);
+ return res;
+ }
+ }
+
+ if (!is_valid_ether_addr(bssid))
+ ifsta->bssid_set = 0;
+ else
+ ifsta->bssid_set = 1;
+ return 0;
+}
+
+
+static void ieee80211_send_nullfunc(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ int powersave)
+{
+ struct sk_buff *skb;
+ struct ieee80211_hdr *nullfunc;
+ u16 fc;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
+ if (!skb) {
+ printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
+ "frame\n", sdata->dev->name);
+ return;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
+ memset(nullfunc, 0, 24);
+ fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
+ IEEE80211_FCTL_TODS;
+ if (powersave)
+ fc |= IEEE80211_FCTL_PM;
+ nullfunc->frame_control = cpu_to_le16(fc);
+ memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
+ memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
+ memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
+
+ ieee80211_sta_tx(sdata->dev, skb, 0);
+}
+
+
+void ieee80211_scan_completed(struct ieee80211_hw *hw)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct net_device *dev = local->scan_dev;
+ struct ieee80211_sub_if_data *sdata;
+ union iwreq_data wrqu;
+
+ local->last_scan_completed = jiffies;
+ wmb();
+ local->sta_scanning = 0;
+
+ if (ieee80211_hw_config(local))
+ printk(KERN_DEBUG "%s: failed to restore operational"
+ "channel after scan\n", dev->name);
+
+ if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) &&
+ ieee80211_if_config(dev))
+ printk(KERN_DEBUG "%s: failed to restore operational"
+ "BSSID after scan\n", dev->name);
+
+ memset(&wrqu, 0, sizeof(wrqu));
+ wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ if (sdata->type == IEEE80211_IF_TYPE_STA) {
+ if (sdata->u.sta.associated)
+ ieee80211_send_nullfunc(local, sdata, 0);
+ ieee80211_sta_timer((unsigned long)sdata);
+ }
+ netif_wake_queue(sdata->dev);
+ }
+ read_unlock(&local->sub_if_lock);
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+ if (!ifsta->bssid_set ||
+ (!ifsta->state == IEEE80211_IBSS_JOINED &&
+ !ieee80211_sta_active_ibss(dev)))
+ ieee80211_sta_find_ibss(dev, ifsta);
+ }
+}
+EXPORT_SYMBOL(ieee80211_scan_completed);
+
+void ieee80211_sta_scan_work(struct work_struct *work)
+{
+ struct ieee80211_local *local =
+ container_of(work, struct ieee80211_local, scan_work.work);
+ struct net_device *dev = local->scan_dev;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_hw_mode *mode;
+ struct ieee80211_channel *chan;
+ int skip;
+ unsigned long next_delay = 0;
+
+ if (!local->sta_scanning)
+ return;
+
+ switch (local->scan_state) {
+ case SCAN_SET_CHANNEL:
+ mode = local->scan_hw_mode;
+ if (local->scan_hw_mode->list.next == &local->modes_list &&
+ local->scan_channel_idx >= mode->num_channels) {
+ ieee80211_scan_completed(local_to_hw(local));
+ return;
+ }
+ skip = !(local->enabled_modes & (1 << mode->mode));
+ chan = &mode->channels[local->scan_channel_idx];
+ if (!(chan->flag & IEEE80211_CHAN_W_SCAN) ||
+ (sdata->type == IEEE80211_IF_TYPE_IBSS &&
+ !(chan->flag & IEEE80211_CHAN_W_IBSS)) ||
+ (local->hw_modes & local->enabled_modes &
+ (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B))
+ skip = 1;
+
+ if (!skip) {
+#if 0
+ printk(KERN_DEBUG "%s: scan channel %d (%d MHz)\n",
+ dev->name, chan->chan, chan->freq);
+#endif
+
+ local->scan_channel = chan;
+ if (ieee80211_hw_config(local)) {
+ printk(KERN_DEBUG "%s: failed to set channel "
+ "%d (%d MHz) for scan\n", dev->name,
+ chan->chan, chan->freq);
+ skip = 1;
+ }
+ }
+
+ local->scan_channel_idx++;
+ if (local->scan_channel_idx >= local->scan_hw_mode->num_channels) {
+ if (local->scan_hw_mode->list.next != &local->modes_list) {
+ local->scan_hw_mode = list_entry(local->scan_hw_mode->list.next,
+ struct ieee80211_hw_mode,
+ list);
+ local->scan_channel_idx = 0;
+ }
+ }
+
+ if (skip)
+ break;
+
+ next_delay = IEEE80211_PROBE_DELAY +
+ usecs_to_jiffies(local->hw.channel_change_time);
+ local->scan_state = SCAN_SEND_PROBE;
+ break;
+ case SCAN_SEND_PROBE:
+ if (local->scan_channel->flag & IEEE80211_CHAN_W_ACTIVE_SCAN) {
+ ieee80211_send_probe_req(dev, NULL, local->scan_ssid,
+ local->scan_ssid_len);
+ next_delay = IEEE80211_CHANNEL_TIME;
+ } else
+ next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ local->scan_state = SCAN_SET_CHANNEL;
+ break;
+ }
+
+ if (local->sta_scanning)
+ queue_delayed_work(local->hw.workqueue, &local->scan_work,
+ next_delay);
+}
+
+
+static int ieee80211_sta_start_scan(struct net_device *dev,
+ u8 *ssid, size_t ssid_len)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+
+ if (ssid_len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+
+ /* MLME-SCAN.request (page 118) page 144 (11.1.3.1)
+ * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
+ * BSSID: MACAddress
+ * SSID
+ * ScanType: ACTIVE, PASSIVE
+ * ProbeDelay: delay (in microseconds) to be used prior to transmitting
+ * a Probe frame during active scanning
+ * ChannelList
+ * MinChannelTime (>= ProbeDelay), in TU
+ * MaxChannelTime: (>= MinChannelTime), in TU
+ */
+
+ /* MLME-SCAN.confirm
+ * BSSDescriptionSet
+ * ResultCode: SUCCESS, INVALID_PARAMETERS
+ */
+
+ if (local->sta_scanning) {
+ if (local->scan_dev == dev)
+ return 0;
+ return -EBUSY;
+ }
+
+ if (local->ops->hw_scan) {
+ int rc = local->ops->hw_scan(local_to_hw(local),
+ ssid, ssid_len);
+ if (!rc) {
+ local->sta_scanning = 1;
+ local->scan_dev = dev;
+ }
+ return rc;
+ }
+
+ local->sta_scanning = 1;
+
+ read_lock(&local->sub_if_lock);
+ list_for_each_entry(sdata, &local->sub_if_list, list) {
+ netif_stop_queue(sdata->dev);
+ if (sdata->type == IEEE80211_IF_TYPE_STA &&
+ sdata->u.sta.associated)
+ ieee80211_send_nullfunc(local, sdata, 1);
+ }
+ read_unlock(&local->sub_if_lock);
+
+ if (ssid) {
+ local->scan_ssid_len = ssid_len;
+ memcpy(local->scan_ssid, ssid, ssid_len);
+ } else
+ local->scan_ssid_len = 0;
+ local->scan_state = SCAN_SET_CHANNEL;
+ local->scan_hw_mode = list_entry(local->modes_list.next,
+ struct ieee80211_hw_mode,
+ list);
+ local->scan_channel_idx = 0;
+ local->scan_dev = dev;
+
+ if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) &&
+ ieee80211_if_config(dev))
+ printk(KERN_DEBUG "%s: failed to set BSSID for scan\n",
+ dev->name);
+
+ /* TODO: start scan as soon as all nullfunc frames are ACKed */
+ queue_delayed_work(local->hw.workqueue, &local->scan_work,
+ IEEE80211_CHANNEL_TIME);
+
+ return 0;
+}
+
+
+int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return ieee80211_sta_start_scan(dev, ssid, ssid_len);
+
+ if (local->sta_scanning) {
+ if (local->scan_dev == dev)
+ return 0;
+ return -EBUSY;
+ }
+
+ set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
+ queue_work(local->hw.workqueue, &ifsta->work);
+ return 0;
+}
+
+static char *
+ieee80211_sta_scan_result(struct net_device *dev,
+ struct ieee80211_sta_bss *bss,
+ char *current_ev, char *end_buf)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct iw_event iwe;
+
+ if (time_after(jiffies,
+ bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
+ return current_ev;
+
+ if (!(local->enabled_modes & (1 << bss->hw_mode)))
+ return current_ev;
+
+ if (local->scan_flags & IEEE80211_SCAN_WPA_ONLY &&
+ !bss->wpa_ie && !bss->rsn_ie)
+ return current_ev;
+
+ if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID &&
+ (local->scan_ssid_len != bss->ssid_len ||
+ memcmp(local->scan_ssid, bss->ssid, bss->ssid_len) != 0))
+ return current_ev;
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWAP;
+ iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
+ memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
+ current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+ IW_EV_ADDR_LEN);
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWESSID;
+ iwe.u.data.length = bss->ssid_len;
+ iwe.u.data.flags = 1;
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+ bss->ssid);
+
+ if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) {
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWMODE;
+ if (bss->capability & WLAN_CAPABILITY_ESS)
+ iwe.u.mode = IW_MODE_MASTER;
+ else
+ iwe.u.mode = IW_MODE_ADHOC;
+ current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+ IW_EV_UINT_LEN);
+ }
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWFREQ;
+ iwe.u.freq.m = bss->channel;
+ iwe.u.freq.e = 0;
+ current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+ IW_EV_FREQ_LEN);
+ iwe.u.freq.m = bss->freq * 100000;
+ iwe.u.freq.e = 1;
+ current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+ IW_EV_FREQ_LEN);
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVQUAL;
+ iwe.u.qual.qual = bss->signal;
+ iwe.u.qual.level = bss->rssi;
+ iwe.u.qual.noise = bss->noise;
+ iwe.u.qual.updated = local->wstats_flags;
+ current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+ IW_EV_QUAL_LEN);
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWENCODE;
+ if (bss->capability & WLAN_CAPABILITY_PRIVACY)
+ iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
+ else
+ iwe.u.data.flags = IW_ENCODE_DISABLED;
+ iwe.u.data.length = 0;
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, "");
+
+ if (bss && bss->wpa_ie) {
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVGENIE;
+ iwe.u.data.length = bss->wpa_ie_len;
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+ bss->wpa_ie);
+ }
+
+ if (bss && bss->rsn_ie) {
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVGENIE;
+ iwe.u.data.length = bss->rsn_ie_len;
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+ bss->rsn_ie);
+ }
+
+ if (bss && bss->supp_rates_len > 0) {
+ /* display all supported rates in readable format */
+ char *p = current_ev + IW_EV_LCP_LEN;
+ int i;
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = SIOCGIWRATE;
+ /* Those two flags are ignored... */
+ iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
+
+ for (i = 0; i < bss->supp_rates_len; i++) {
+ iwe.u.bitrate.value = ((bss->supp_rates[i] &
+ 0x7f) * 500000);
+ p = iwe_stream_add_value(current_ev, p,
+ end_buf, &iwe, IW_EV_PARAM_LEN);
+ }
+ current_ev = p;
+ }
+
+ if (bss) {
+ char *buf;
+ buf = kmalloc(30, GFP_ATOMIC);
+ if (buf) {
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVCUSTOM;
+ sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
+ iwe.u.data.length = strlen(buf);
+ current_ev = iwe_stream_add_point(current_ev, end_buf,
+ &iwe, buf);
+ kfree(buf);
+ }
+ }
+
+ do {
+ char *buf;
+
+ if (!(local->scan_flags & IEEE80211_SCAN_EXTRA_INFO))
+ break;
+
+ buf = kmalloc(100, GFP_ATOMIC);
+ if (!buf)
+ break;
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVCUSTOM;
+ sprintf(buf, "bcn_int=%d", bss->beacon_int);
+ iwe.u.data.length = strlen(buf);
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+ buf);
+
+ memset(&iwe, 0, sizeof(iwe));
+ iwe.cmd = IWEVCUSTOM;
+ sprintf(buf, "capab=0x%04x", bss->capability);
+ iwe.u.data.length = strlen(buf);
+ current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+ buf);
+
+ kfree(buf);
+ break;
+ } while (0);
+
+ return current_ev;
+}
+
+
+int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ char *current_ev = buf;
+ char *end_buf = buf + len;
+ struct ieee80211_sta_bss *bss;
+
+ spin_lock_bh(&local->sta_bss_lock);
+ list_for_each_entry(bss, &local->sta_bss_list, list) {
+ if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
+ spin_unlock_bh(&local->sta_bss_lock);
+ return -E2BIG;
+ }
+ current_ev = ieee80211_sta_scan_result(dev, bss, current_ev,
+ end_buf);
+ }
+ spin_unlock_bh(&local->sta_bss_lock);
+ return current_ev - buf;
+}
+
+
+int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+ kfree(ifsta->extra_ie);
+ if (len == 0) {
+ ifsta->extra_ie = NULL;
+ ifsta->extra_ie_len = 0;
+ return 0;
+ }
+ ifsta->extra_ie = kmalloc(len, GFP_KERNEL);
+ if (!ifsta->extra_ie) {
+ ifsta->extra_ie_len = 0;
+ return -ENOMEM;
+ }
+ memcpy(ifsta->extra_ie, ie, len);
+ ifsta->extra_ie_len = len;
+ return 0;
+}
+
+
+struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
+ struct sk_buff *skb, u8 *bssid,
+ u8 *addr)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct sta_info *sta;
+ struct ieee80211_sub_if_data *sdata = NULL;
+
+ /* TODO: Could consider removing the least recently used entry and
+ * allow new one to be added. */
+ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s: No room for a new IBSS STA "
+ "entry " MAC_FMT "\n", dev->name, MAC_ARG(addr));
+ }
+ return NULL;
+ }
+
+ printk(KERN_DEBUG "%s: Adding new IBSS station " MAC_FMT " (dev=%s)\n",
+ local->mdev->name, MAC_ARG(addr), dev->name);
+
+ sta = sta_info_add(local, dev, addr, GFP_ATOMIC);
+ if (!sta)
+ return NULL;
+
+ sta->supp_rates = sdata->u.sta.supp_rates_bits;
+
+ rate_control_rate_init(sta, local);
+
+ return sta; /* caller will call sta_info_put() */
+}
+
+
+int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+
+ printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n",
+ dev->name, reason);
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA &&
+ sdata->type != IEEE80211_IF_TYPE_IBSS)
+ return -EINVAL;
+
+ ieee80211_send_deauth(dev, ifsta, reason);
+ ieee80211_set_disassoc(dev, ifsta, 1);
+ return 0;
+}
+
+
+int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+
+ printk(KERN_DEBUG "%s: disassociate(reason=%d)\n",
+ dev->name, reason);
+
+ if (sdata->type != IEEE80211_IF_TYPE_STA)
+ return -EINVAL;
+
+ if (!ifsta->associated)
+ return -1;
+
+ ieee80211_send_disassoc(dev, ifsta, reason);
+ ieee80211_set_disassoc(dev, ifsta, 0);
+ return 0;
+}
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
new file mode 100644
index 000000000000..0f844f7895f1
--- /dev/null
+++ b/net/mac80211/michael.c
@@ -0,0 +1,104 @@
+/*
+ * Michael MIC implementation - optimized for TKIP MIC operations
+ * Copyright 2002-2003, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+
+#include "michael.h"
+
+static inline u32 rotr(u32 val, int bits)
+{
+ return (val >> bits) | (val << (32 - bits));
+}
+
+
+static inline u32 rotl(u32 val, int bits)
+{
+ return (val << bits) | (val >> (32 - bits));
+}
+
+
+static inline u32 xswap(u32 val)
+{
+ return ((val & 0xff00ff00) >> 8) | ((val & 0x00ff00ff) << 8);
+}
+
+
+#define michael_block(l, r) \
+do { \
+ r ^= rotl(l, 17); \
+ l += r; \
+ r ^= xswap(l); \
+ l += r; \
+ r ^= rotl(l, 3); \
+ l += r; \
+ r ^= rotr(l, 2); \
+ l += r; \
+} while (0)
+
+
+static inline u32 michael_get32(u8 *data)
+{
+ return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+}
+
+
+static inline void michael_put32(u32 val, u8 *data)
+{
+ data[0] = val & 0xff;
+ data[1] = (val >> 8) & 0xff;
+ data[2] = (val >> 16) & 0xff;
+ data[3] = (val >> 24) & 0xff;
+}
+
+
+void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
+ u8 *data, size_t data_len, u8 *mic)
+{
+ u32 l, r, val;
+ size_t block, blocks, left;
+
+ l = michael_get32(key);
+ r = michael_get32(key + 4);
+
+ /* A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
+ * calculation, but it is _not_ transmitted */
+ l ^= michael_get32(da);
+ michael_block(l, r);
+ l ^= da[4] | (da[5] << 8) | (sa[0] << 16) | (sa[1] << 24);
+ michael_block(l, r);
+ l ^= michael_get32(&sa[2]);
+ michael_block(l, r);
+ l ^= priority;
+ michael_block(l, r);
+
+ /* Real data */
+ blocks = data_len / 4;
+ left = data_len % 4;
+
+ for (block = 0; block < blocks; block++) {
+ l ^= michael_get32(&data[block * 4]);
+ michael_block(l, r);
+ }
+
+ /* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make
+ * total length a multiple of 4. */
+ val = 0x5a;
+ while (left > 0) {
+ val <<= 8;
+ left--;
+ val |= data[blocks * 4 + left];
+ }
+ l ^= val;
+ michael_block(l, r);
+ /* last block is zero, so l ^ 0 = l */
+ michael_block(l, r);
+
+ michael_put32(l, mic);
+ michael_put32(r, mic + 4);
+}
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
new file mode 100644
index 000000000000..2e6aebabeea1
--- /dev/null
+++ b/net/mac80211/michael.h
@@ -0,0 +1,20 @@
+/*
+ * Michael MIC implementation - optimized for TKIP MIC operations
+ * Copyright 2002-2003, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MICHAEL_H
+#define MICHAEL_H
+
+#include <linux/types.h>
+
+#define MICHAEL_MIC_LEN 8
+
+void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
+ u8 *data, size_t data_len, u8 *mic);
+
+#endif /* MICHAEL_H */
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
new file mode 100644
index 000000000000..2048cfd1ca70
--- /dev/null
+++ b/net/mac80211/rc80211_simple.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/compiler.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "debugfs.h"
+
+
+/* This is a minimal implementation of TX rate controlling that can be used
+ * as the default when no improved mechanisms are available. */
+
+
+#define RATE_CONTROL_EMERG_DEC 2
+#define RATE_CONTROL_INTERVAL (HZ / 20)
+#define RATE_CONTROL_MIN_TX 10
+
+MODULE_ALIAS("rc80211_default");
+
+static void rate_control_rate_inc(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hw_mode *mode;
+ int i = sta->txrate;
+ int maxrate;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+ if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
+ /* forced unicast rate - do not change STA rate */
+ return;
+ }
+
+ mode = local->oper_hw_mode;
+ maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
+
+ if (i > mode->num_rates)
+ i = mode->num_rates - 2;
+
+ while (i + 1 < mode->num_rates) {
+ i++;
+ if (sta->supp_rates & BIT(i) &&
+ mode->rates[i].flags & IEEE80211_RATE_SUPPORTED &&
+ (maxrate < 0 || i <= maxrate)) {
+ sta->txrate = i;
+ break;
+ }
+ }
+}
+
+
+static void rate_control_rate_dec(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hw_mode *mode;
+ int i = sta->txrate;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+ if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
+ /* forced unicast rate - do not change STA rate */
+ return;
+ }
+
+ mode = local->oper_hw_mode;
+ if (i > mode->num_rates)
+ i = mode->num_rates;
+
+ while (i > 0) {
+ i--;
+ if (sta->supp_rates & BIT(i) &&
+ mode->rates[i].flags & IEEE80211_RATE_SUPPORTED) {
+ sta->txrate = i;
+ break;
+ }
+ }
+}
+
+
+static struct ieee80211_rate *
+rate_control_lowest_rate(struct ieee80211_local *local,
+ struct ieee80211_hw_mode *mode)
+{
+ int i;
+
+ for (i = 0; i < mode->num_rates; i++) {
+ struct ieee80211_rate *rate = &mode->rates[i];
+
+ if (rate->flags & IEEE80211_RATE_SUPPORTED)
+ return rate;
+ }
+
+ printk(KERN_DEBUG "rate_control_lowest_rate - no supported rates "
+ "found\n");
+ return &mode->rates[0];
+}
+
+
+struct global_rate_control {
+ int dummy;
+};
+
+struct sta_rate_control {
+ unsigned long last_rate_change;
+ u32 tx_num_failures;
+ u32 tx_num_xmit;
+
+ unsigned long avg_rate_update;
+ u32 tx_avg_rate_sum;
+ u32 tx_avg_rate_num;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct dentry *tx_avg_rate_sum_dentry;
+ struct dentry *tx_avg_rate_num_dentry;
+#endif
+};
+
+
+static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
+ struct sk_buff *skb,
+ struct ieee80211_tx_status *status)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct sta_info *sta;
+ struct sta_rate_control *srctrl;
+
+ sta = sta_info_get(local, hdr->addr1);
+
+ if (!sta)
+ return;
+
+ srctrl = sta->rate_ctrl_priv;
+ srctrl->tx_num_xmit++;
+ if (status->excessive_retries) {
+ sta->antenna_sel_tx = sta->antenna_sel_tx == 1 ? 2 : 1;
+ sta->antenna_sel_rx = sta->antenna_sel_rx == 1 ? 2 : 1;
+ if (local->sta_antenna_sel == STA_ANTENNA_SEL_SW_CTRL_DEBUG) {
+ printk(KERN_DEBUG "%s: " MAC_FMT " TX antenna --> %d "
+ "RX antenna --> %d (@%lu)\n",
+ dev->name, MAC_ARG(hdr->addr1),
+ sta->antenna_sel_tx, sta->antenna_sel_rx, jiffies);
+ }
+ srctrl->tx_num_failures++;
+ sta->tx_retry_failed++;
+ sta->tx_num_consecutive_failures++;
+ sta->tx_num_mpdu_fail++;
+ } else {
+ sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
+ sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
+ sta->last_ack_rssi[2] = status->ack_signal;
+ sta->tx_num_consecutive_failures = 0;
+ sta->tx_num_mpdu_ok++;
+ }
+ sta->tx_retry_count += status->retry_count;
+ sta->tx_num_mpdu_fail += status->retry_count;
+
+ if (time_after(jiffies,
+ srctrl->last_rate_change + RATE_CONTROL_INTERVAL) &&
+ srctrl->tx_num_xmit > RATE_CONTROL_MIN_TX) {
+ u32 per_failed;
+ srctrl->last_rate_change = jiffies;
+
+ per_failed = (100 * sta->tx_num_mpdu_fail) /
+ (sta->tx_num_mpdu_fail + sta->tx_num_mpdu_ok);
+ /* TODO: calculate average per_failed to make adjusting
+ * parameters easier */
+#if 0
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "MPDU fail=%d ok=%d per_failed=%d\n",
+ sta->tx_num_mpdu_fail, sta->tx_num_mpdu_ok,
+ per_failed);
+ }
+#endif
+
+ if (per_failed > local->rate_ctrl_num_down) {
+ rate_control_rate_dec(local, sta);
+ } else if (per_failed < local->rate_ctrl_num_up) {
+ rate_control_rate_inc(local, sta);
+ }
+ srctrl->tx_avg_rate_sum += status->control.rate->rate;
+ srctrl->tx_avg_rate_num++;
+ srctrl->tx_num_failures = 0;
+ srctrl->tx_num_xmit = 0;
+ } else if (sta->tx_num_consecutive_failures >=
+ RATE_CONTROL_EMERG_DEC) {
+ rate_control_rate_dec(local, sta);
+ }
+
+ if (srctrl->avg_rate_update + 60 * HZ < jiffies) {
+ srctrl->avg_rate_update = jiffies;
+ if (srctrl->tx_avg_rate_num > 0) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: STA " MAC_FMT " Average rate: "
+ "%d (%d/%d)\n",
+ dev->name, MAC_ARG(sta->addr),
+ srctrl->tx_avg_rate_sum /
+ srctrl->tx_avg_rate_num,
+ srctrl->tx_avg_rate_sum,
+ srctrl->tx_avg_rate_num);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+ srctrl->tx_avg_rate_sum = 0;
+ srctrl->tx_avg_rate_num = 0;
+ }
+ }
+
+ sta_info_put(sta);
+}
+
+
+static struct ieee80211_rate *
+rate_control_simple_get_rate(void *priv, struct net_device *dev,
+ struct sk_buff *skb,
+ struct rate_control_extra *extra)
+{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_hw_mode *mode = extra->mode;
+ struct sta_info *sta;
+ int rateidx, nonerp_idx;
+ u16 fc;
+
+ memset(extra, 0, sizeof(*extra));
+
+ fc = le16_to_cpu(hdr->frame_control);
+ if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+ (hdr->addr1[0] & 0x01)) {
+ /* Send management frames and broadcast/multicast data using
+ * lowest rate. */
+ /* TODO: this could probably be improved.. */
+ return rate_control_lowest_rate(local, mode);
+ }
+
+ sta = sta_info_get(local, hdr->addr1);
+
+ if (!sta)
+ return rate_control_lowest_rate(local, mode);
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
+ sta->txrate = sdata->bss->force_unicast_rateidx;
+
+ rateidx = sta->txrate;
+
+ if (rateidx >= mode->num_rates)
+ rateidx = mode->num_rates - 1;
+
+ sta->last_txrate = rateidx;
+ nonerp_idx = rateidx;
+ while (nonerp_idx > 0 &&
+ ((mode->rates[nonerp_idx].flags & IEEE80211_RATE_ERP) ||
+ !(mode->rates[nonerp_idx].flags & IEEE80211_RATE_SUPPORTED) ||
+ !(sta->supp_rates & BIT(nonerp_idx))))
+ nonerp_idx--;
+ extra->nonerp = &mode->rates[nonerp_idx];
+
+ sta_info_put(sta);
+
+ return &mode->rates[rateidx];
+}
+
+
+static void rate_control_simple_rate_init(void *priv, void *priv_sta,
+ struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ struct ieee80211_hw_mode *mode;
+ int i;
+ sta->txrate = 0;
+ mode = local->oper_hw_mode;
+ /* TODO: what is a good starting rate for STA? About middle? Maybe not
+ * the lowest or the highest rate.. Could consider using RSSI from
+ * previous packets? Need to have IEEE 802.1X auth succeed immediately
+ * after assoc.. */
+ for (i = 0; i < mode->num_rates; i++) {
+ if ((sta->supp_rates & BIT(i)) &&
+ (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED))
+ sta->txrate = i;
+ }
+}
+
+
+static void * rate_control_simple_alloc(struct ieee80211_local *local)
+{
+ struct global_rate_control *rctrl;
+
+ rctrl = kzalloc(sizeof(*rctrl), GFP_ATOMIC);
+
+ return rctrl;
+}
+
+
+static void rate_control_simple_free(void *priv)
+{
+ struct global_rate_control *rctrl = priv;
+ kfree(rctrl);
+}
+
+
+static void rate_control_simple_clear(void *priv)
+{
+}
+
+
+static void * rate_control_simple_alloc_sta(void *priv, gfp_t gfp)
+{
+ struct sta_rate_control *rctrl;
+
+ rctrl = kzalloc(sizeof(*rctrl), gfp);
+
+ return rctrl;
+}
+
+
+static void rate_control_simple_free_sta(void *priv, void *priv_sta)
+{
+ struct sta_rate_control *rctrl = priv_sta;
+ kfree(rctrl);
+}
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+
+static int open_file_generic(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static ssize_t sta_tx_avg_rate_sum_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_rate_control *srctrl = file->private_data;
+ char buf[20];
+
+ sprintf(buf, "%d\n", srctrl->tx_avg_rate_sum);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
+}
+
+static const struct file_operations sta_tx_avg_rate_sum_ops = {
+ .read = sta_tx_avg_rate_sum_read,
+ .open = open_file_generic,
+};
+
+static ssize_t sta_tx_avg_rate_num_read(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_rate_control *srctrl = file->private_data;
+ char buf[20];
+
+ sprintf(buf, "%d\n", srctrl->tx_avg_rate_num);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
+}
+
+static const struct file_operations sta_tx_avg_rate_num_ops = {
+ .read = sta_tx_avg_rate_num_read,
+ .open = open_file_generic,
+};
+
+static void rate_control_simple_add_sta_debugfs(void *priv, void *priv_sta,
+ struct dentry *dir)
+{
+ struct sta_rate_control *srctrl = priv_sta;
+
+ srctrl->tx_avg_rate_num_dentry =
+ debugfs_create_file("rc_simple_sta_tx_avg_rate_num", 0400,
+ dir, srctrl, &sta_tx_avg_rate_num_ops);
+ srctrl->tx_avg_rate_sum_dentry =
+ debugfs_create_file("rc_simple_sta_tx_avg_rate_sum", 0400,
+ dir, srctrl, &sta_tx_avg_rate_sum_ops);
+}
+
+static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)
+{
+ struct sta_rate_control *srctrl = priv_sta;
+
+ debugfs_remove(srctrl->tx_avg_rate_sum_dentry);
+ debugfs_remove(srctrl->tx_avg_rate_num_dentry);
+}
+#endif
+
+static struct rate_control_ops rate_control_simple = {
+ .module = THIS_MODULE,
+ .name = "simple",
+ .tx_status = rate_control_simple_tx_status,
+ .get_rate = rate_control_simple_get_rate,
+ .rate_init = rate_control_simple_rate_init,
+ .clear = rate_control_simple_clear,
+ .alloc = rate_control_simple_alloc,
+ .free = rate_control_simple_free,
+ .alloc_sta = rate_control_simple_alloc_sta,
+ .free_sta = rate_control_simple_free_sta,
+#ifdef CONFIG_MAC80211_DEBUGFS
+ .add_sta_debugfs = rate_control_simple_add_sta_debugfs,
+ .remove_sta_debugfs = rate_control_simple_remove_sta_debugfs,
+#endif
+};
+
+
+static int __init rate_control_simple_init(void)
+{
+ return ieee80211_rate_control_register(&rate_control_simple);
+}
+
+
+static void __exit rate_control_simple_exit(void)
+{
+ ieee80211_rate_control_unregister(&rate_control_simple);
+}
+
+
+module_init(rate_control_simple_init);
+module_exit(rate_control_simple_exit);
+
+MODULE_DESCRIPTION("Simple rate control algorithm for ieee80211");
+MODULE_LICENSE("GPL");
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
new file mode 100644
index 000000000000..ab7b1f067c6e
--- /dev/null
+++ b/net/mac80211/sta_info.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "ieee80211_rate.h"
+#include "sta_info.h"
+#include "debugfs_key.h"
+#include "debugfs_sta.h"
+
+/* Caller must hold local->sta_lock */
+static void sta_info_hash_add(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ sta->hnext = local->sta_hash[STA_HASH(sta->addr)];
+ local->sta_hash[STA_HASH(sta->addr)] = sta;
+}
+
+
+/* Caller must hold local->sta_lock */
+static void sta_info_hash_del(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ struct sta_info *s;
+
+ s = local->sta_hash[STA_HASH(sta->addr)];
+ if (!s)
+ return;
+ if (memcmp(s->addr, sta->addr, ETH_ALEN) == 0) {
+ local->sta_hash[STA_HASH(sta->addr)] = s->hnext;
+ return;
+ }
+
+ while (s->hnext && memcmp(s->hnext->addr, sta->addr, ETH_ALEN) != 0)
+ s = s->hnext;
+ if (s->hnext)
+ s->hnext = s->hnext->hnext;
+ else
+ printk(KERN_ERR "%s: could not remove STA " MAC_FMT " from "
+ "hash table\n", local->mdev->name, MAC_ARG(sta->addr));
+}
+
+static inline void __sta_info_get(struct sta_info *sta)
+{
+ kref_get(&sta->kref);
+}
+
+struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr)
+{
+ struct sta_info *sta;
+
+ spin_lock_bh(&local->sta_lock);
+ sta = local->sta_hash[STA_HASH(addr)];
+ while (sta) {
+ if (memcmp(sta->addr, addr, ETH_ALEN) == 0) {
+ __sta_info_get(sta);
+ break;
+ }
+ sta = sta->hnext;
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ return sta;
+}
+EXPORT_SYMBOL(sta_info_get);
+
+int sta_info_min_txrate_get(struct ieee80211_local *local)
+{
+ struct sta_info *sta;
+ struct ieee80211_hw_mode *mode;
+ int min_txrate = 9999999;
+ int i;
+
+ spin_lock_bh(&local->sta_lock);
+ mode = local->oper_hw_mode;
+ for (i = 0; i < STA_HASH_SIZE; i++) {
+ sta = local->sta_hash[i];
+ while (sta) {
+ if (sta->txrate < min_txrate)
+ min_txrate = sta->txrate;
+ sta = sta->hnext;
+ }
+ }
+ spin_unlock_bh(&local->sta_lock);
+ if (min_txrate == 9999999)
+ min_txrate = 0;
+
+ return mode->rates[min_txrate].rate;
+}
+
+
+static void sta_info_release(struct kref *kref)
+{
+ struct sta_info *sta = container_of(kref, struct sta_info, kref);
+ struct ieee80211_local *local = sta->local;
+ struct sk_buff *skb;
+
+ /* free sta structure; it has already been removed from
+ * hash table etc. external structures. Make sure that all
+ * buffered frames are release (one might have been added
+ * after sta_info_free() was called). */
+ while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
+ local->total_ps_buffered--;
+ dev_kfree_skb_any(skb);
+ }
+ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
+ dev_kfree_skb_any(skb);
+ }
+ rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv);
+ rate_control_put(sta->rate_ctrl);
+ if (sta->key)
+ ieee80211_debugfs_key_sta_del(sta->key, sta);
+ kfree(sta);
+}
+
+
+void sta_info_put(struct sta_info *sta)
+{
+ kref_put(&sta->kref, sta_info_release);
+}
+EXPORT_SYMBOL(sta_info_put);
+
+
+struct sta_info * sta_info_add(struct ieee80211_local *local,
+ struct net_device *dev, u8 *addr, gfp_t gfp)
+{
+ struct sta_info *sta;
+
+ sta = kzalloc(sizeof(*sta), gfp);
+ if (!sta)
+ return NULL;
+
+ kref_init(&sta->kref);
+
+ sta->rate_ctrl = rate_control_get(local->rate_ctrl);
+ sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, gfp);
+ if (!sta->rate_ctrl_priv) {
+ rate_control_put(sta->rate_ctrl);
+ kref_put(&sta->kref, sta_info_release);
+ kfree(sta);
+ return NULL;
+ }
+
+ memcpy(sta->addr, addr, ETH_ALEN);
+ sta->local = local;
+ sta->dev = dev;
+ skb_queue_head_init(&sta->ps_tx_buf);
+ skb_queue_head_init(&sta->tx_filtered);
+ __sta_info_get(sta); /* sta used by caller, decremented by
+ * sta_info_put() */
+ spin_lock_bh(&local->sta_lock);
+ list_add(&sta->list, &local->sta_list);
+ local->num_sta++;
+ sta_info_hash_add(local, sta);
+ spin_unlock_bh(&local->sta_lock);
+ if (local->ops->sta_table_notification)
+ local->ops->sta_table_notification(local_to_hw(local),
+ local->num_sta);
+ sta->key_idx_compression = HW_KEY_IDX_INVALID;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: Added STA " MAC_FMT "\n",
+ local->mdev->name, MAC_ARG(addr));
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ if (!in_interrupt()) {
+ sta->debugfs_registered = 1;
+ ieee80211_sta_debugfs_add(sta);
+ rate_control_add_sta_debugfs(sta);
+ } else {
+ /* debugfs entry adding might sleep, so schedule process
+ * context task for adding entry for STAs that do not yet
+ * have one. */
+ queue_work(local->hw.workqueue, &local->sta_debugfs_add);
+ }
+#endif
+
+ return sta;
+}
+
+static void finish_sta_info_free(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+ printk(KERN_DEBUG "%s: Removed STA " MAC_FMT "\n",
+ local->mdev->name, MAC_ARG(sta->addr));
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+
+ if (sta->key) {
+ ieee80211_debugfs_key_remove(sta->key);
+ ieee80211_key_free(sta->key);
+ sta->key = NULL;
+ }
+
+ rate_control_remove_sta_debugfs(sta);
+ ieee80211_sta_debugfs_remove(sta);
+
+ sta_info_put(sta);
+}
+
+static void sta_info_remove(struct sta_info *sta)
+{
+ struct ieee80211_local *local = sta->local;
+ struct ieee80211_sub_if_data *sdata;
+
+ sta_info_hash_del(local, sta);
+ list_del(&sta->list);
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+ if (sta->flags & WLAN_STA_PS) {
+ sta->flags &= ~WLAN_STA_PS;
+ if (sdata->bss)
+ atomic_dec(&sdata->bss->num_sta_ps);
+ }
+ local->num_sta--;
+ sta_info_remove_aid_ptr(sta);
+}
+
+void sta_info_free(struct sta_info *sta, int locked)
+{
+ struct sk_buff *skb;
+ struct ieee80211_local *local = sta->local;
+
+ if (!locked) {
+ spin_lock_bh(&local->sta_lock);
+ sta_info_remove(sta);
+ spin_unlock_bh(&local->sta_lock);
+ } else {
+ sta_info_remove(sta);
+ }
+ if (local->ops->sta_table_notification)
+ local->ops->sta_table_notification(local_to_hw(local),
+ local->num_sta);
+
+ while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
+ local->total_ps_buffered--;
+ dev_kfree_skb_any(skb);
+ }
+ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
+ dev_kfree_skb_any(skb);
+ }
+
+ if (sta->key) {
+ if (local->ops->set_key) {
+ struct ieee80211_key_conf *key;
+ key = ieee80211_key_data2conf(local, sta->key);
+ if (key) {
+ local->ops->set_key(local_to_hw(local),
+ DISABLE_KEY,
+ sta->addr, key, sta->aid);
+ kfree(key);
+ }
+ }
+ } else if (sta->key_idx_compression != HW_KEY_IDX_INVALID) {
+ struct ieee80211_key_conf conf;
+ memset(&conf, 0, sizeof(conf));
+ conf.hw_key_idx = sta->key_idx_compression;
+ conf.alg = ALG_NULL;
+ conf.flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT;
+ local->ops->set_key(local_to_hw(local), DISABLE_KEY,
+ sta->addr, &conf, sta->aid);
+ sta->key_idx_compression = HW_KEY_IDX_INVALID;
+ }
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ if (in_atomic()) {
+ list_add(&sta->list, &local->deleted_sta_list);
+ queue_work(local->hw.workqueue, &local->sta_debugfs_add);
+ } else
+#endif
+ finish_sta_info_free(local, sta);
+}
+
+
+static inline int sta_info_buffer_expired(struct ieee80211_local *local,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ struct ieee80211_tx_packet_data *pkt_data;
+ int timeout;
+
+ if (!skb)
+ return 0;
+
+ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+
+ /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */
+ timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 /
+ 15625) * HZ;
+ if (timeout < STA_TX_BUFFER_EXPIRE)
+ timeout = STA_TX_BUFFER_EXPIRE;
+ return time_after(jiffies, pkt_data->jiffies + timeout);
+}
+
+
+static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ unsigned long flags;
+ struct sk_buff *skb;
+
+ if (skb_queue_empty(&sta->ps_tx_buf))
+ return;
+
+ for (;;) {
+ spin_lock_irqsave(&sta->ps_tx_buf.lock, flags);
+ skb = skb_peek(&sta->ps_tx_buf);
+ if (sta_info_buffer_expired(local, sta, skb)) {
+ skb = __skb_dequeue(&sta->ps_tx_buf);
+ if (skb_queue_empty(&sta->ps_tx_buf))
+ sta->flags &= ~WLAN_STA_TIM;
+ } else
+ skb = NULL;
+ spin_unlock_irqrestore(&sta->ps_tx_buf.lock, flags);
+
+ if (skb) {
+ local->total_ps_buffered--;
+ printk(KERN_DEBUG "Buffered frame expired (STA "
+ MAC_FMT ")\n", MAC_ARG(sta->addr));
+ dev_kfree_skb(skb);
+ } else
+ break;
+ }
+}
+
+
+static void sta_info_cleanup(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *) data;
+ struct sta_info *sta;
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ __sta_info_get(sta);
+ sta_info_cleanup_expire_buffered(local, sta);
+ sta_info_put(sta);
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL;
+ add_timer(&local->sta_cleanup);
+}
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+static void sta_info_debugfs_add_task(struct work_struct *work)
+{
+ struct ieee80211_local *local =
+ container_of(work, struct ieee80211_local, sta_debugfs_add);
+ struct sta_info *sta, *tmp;
+
+ while (1) {
+ spin_lock_bh(&local->sta_lock);
+ if (!list_empty(&local->deleted_sta_list)) {
+ sta = list_entry(local->deleted_sta_list.next,
+ struct sta_info, list);
+ list_del(local->deleted_sta_list.next);
+ } else
+ sta = NULL;
+ spin_unlock_bh(&local->sta_lock);
+ if (!sta)
+ break;
+ finish_sta_info_free(local, sta);
+ }
+
+ while (1) {
+ sta = NULL;
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry(tmp, &local->sta_list, list) {
+ if (!tmp->debugfs_registered) {
+ sta = tmp;
+ __sta_info_get(sta);
+ break;
+ }
+ }
+ spin_unlock_bh(&local->sta_lock);
+
+ if (!sta)
+ break;
+
+ sta->debugfs_registered = 1;
+ ieee80211_sta_debugfs_add(sta);
+ rate_control_add_sta_debugfs(sta);
+ sta_info_put(sta);
+ }
+}
+#endif
+
+void sta_info_init(struct ieee80211_local *local)
+{
+ spin_lock_init(&local->sta_lock);
+ INIT_LIST_HEAD(&local->sta_list);
+ INIT_LIST_HEAD(&local->deleted_sta_list);
+
+ init_timer(&local->sta_cleanup);
+ local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL;
+ local->sta_cleanup.data = (unsigned long) local;
+ local->sta_cleanup.function = sta_info_cleanup;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task);
+#endif
+}
+
+int sta_info_start(struct ieee80211_local *local)
+{
+ add_timer(&local->sta_cleanup);
+ return 0;
+}
+
+void sta_info_stop(struct ieee80211_local *local)
+{
+ struct sta_info *sta, *tmp;
+
+ del_timer(&local->sta_cleanup);
+
+ list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
+ /* sta_info_free must be called with 0 as the last
+ * parameter to ensure all debugfs sta entries are
+ * unregistered. We don't need locking at this
+ * point. */
+ sta_info_free(sta, 0);
+ }
+}
+
+void sta_info_remove_aid_ptr(struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ if (sta->aid <= 0)
+ return;
+
+ sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+
+ if (sdata->local->ops->set_tim)
+ sdata->local->ops->set_tim(local_to_hw(sdata->local),
+ sta->aid, 0);
+ if (sdata->bss)
+ __bss_tim_clear(sdata->bss, sta->aid);
+}
+
+
+/**
+ * sta_info_flush - flush matching STA entries from the STA table
+ * @local: local interface data
+ * @dev: matching rule for the net device (sta->dev) or %NULL to match all STAs
+ */
+void sta_info_flush(struct ieee80211_local *local, struct net_device *dev)
+{
+ struct sta_info *sta, *tmp;
+
+ spin_lock_bh(&local->sta_lock);
+ list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
+ if (!dev || dev == sta->dev)
+ sta_info_free(sta, 1);
+ spin_unlock_bh(&local->sta_lock);
+}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
new file mode 100644
index 000000000000..b5591d2f60a4
--- /dev/null
+++ b/net/mac80211/sta_info.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2002-2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef STA_INFO_H
+#define STA_INFO_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/kref.h>
+#include "ieee80211_key.h"
+
+/* Stations flags (struct sta_info::flags) */
+#define WLAN_STA_AUTH BIT(0)
+#define WLAN_STA_ASSOC BIT(1)
+#define WLAN_STA_PS BIT(2)
+#define WLAN_STA_TIM BIT(3) /* TIM bit is on for PS stations */
+#define WLAN_STA_PERM BIT(4) /* permanent; do not remove entry on expiration */
+#define WLAN_STA_AUTHORIZED BIT(5) /* If 802.1X is used, this flag is
+ * controlling whether STA is authorized to
+ * send and receive non-IEEE 802.1X frames
+ */
+#define WLAN_STA_SHORT_PREAMBLE BIT(7)
+#define WLAN_STA_WME BIT(9)
+#define WLAN_STA_WDS BIT(27)
+
+
+struct sta_info {
+ struct kref kref;
+ struct list_head list;
+ struct sta_info *hnext; /* next entry in hash table list */
+
+ struct ieee80211_local *local;
+
+ u8 addr[ETH_ALEN];
+ u16 aid; /* STA's unique AID (1..2007), 0 = not yet assigned */
+ u32 flags; /* WLAN_STA_ */
+
+ struct sk_buff_head ps_tx_buf; /* buffer of TX frames for station in
+ * power saving state */
+ int pspoll; /* whether STA has send a PS Poll frame */
+ struct sk_buff_head tx_filtered; /* buffer of TX frames that were
+ * already given to low-level driver,
+ * but were filtered */
+ int clear_dst_mask;
+
+ unsigned long rx_packets, tx_packets; /* number of RX/TX MSDUs */
+ unsigned long rx_bytes, tx_bytes;
+ unsigned long tx_retry_failed, tx_retry_count;
+ unsigned long tx_filtered_count;
+
+ unsigned int wep_weak_iv_count; /* number of RX frames with weak IV */
+
+ unsigned long last_rx;
+ u32 supp_rates; /* bitmap of supported rates in local->curr_rates */
+ int txrate; /* index in local->curr_rates */
+ int last_txrate; /* last rate used to send a frame to this STA */
+ int last_nonerp_idx;
+
+ struct net_device *dev; /* which net device is this station associated
+ * to */
+
+ struct ieee80211_key *key;
+
+ u32 tx_num_consecutive_failures;
+ u32 tx_num_mpdu_ok;
+ u32 tx_num_mpdu_fail;
+
+ struct rate_control_ref *rate_ctrl;
+ void *rate_ctrl_priv;
+
+ /* last received seq/frag number from this STA (per RX queue) */
+ __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
+ unsigned long num_duplicates; /* number of duplicate frames received
+ * from this STA */
+ unsigned long tx_fragments; /* number of transmitted MPDUs */
+ unsigned long rx_fragments; /* number of received MPDUs */
+ unsigned long rx_dropped; /* number of dropped MPDUs from this STA */
+
+ int last_rssi; /* RSSI of last received frame from this STA */
+ int last_signal; /* signal of last received frame from this STA */
+ int last_noise; /* noise of last received frame from this STA */
+ int last_ack_rssi[3]; /* RSSI of last received ACKs from this STA */
+ unsigned long last_ack;
+ int channel_use;
+ int channel_use_raw;
+
+ u8 antenna_sel_tx;
+ u8 antenna_sel_rx;
+
+
+ int key_idx_compression; /* key table index for compression and TX
+ * filtering; used only if sta->key is not
+ * set */
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ int debugfs_registered;
+#endif
+ int assoc_ap; /* whether this is an AP that we are
+ * associated with as a client */
+
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES];
+ unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
+#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */
+
+ int vlan_id;
+
+ u16 listen_interval;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct sta_info_debugfsdentries {
+ struct dentry *dir;
+ struct dentry *flags;
+ struct dentry *num_ps_buf_frames;
+ struct dentry *last_ack_rssi;
+ struct dentry *last_ack_ms;
+ struct dentry *inactive_ms;
+ struct dentry *last_seq_ctrl;
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ struct dentry *wme_rx_queue;
+ struct dentry *wme_tx_queue;
+#endif
+ } debugfs;
+#endif
+};
+
+
+/* Maximum number of concurrently registered stations */
+#define MAX_STA_COUNT 2007
+
+#define STA_HASH_SIZE 256
+#define STA_HASH(sta) (sta[5])
+
+
+/* Maximum number of frames to buffer per power saving station */
+#define STA_MAX_TX_BUFFER 128
+
+/* Minimum buffered frame expiry time. If STA uses listen interval that is
+ * smaller than this value, the minimum value here is used instead. */
+#define STA_TX_BUFFER_EXPIRE (10 * HZ)
+
+/* How often station data is cleaned up (e.g., expiration of buffered frames)
+ */
+#define STA_INFO_CLEANUP_INTERVAL (10 * HZ)
+
+struct sta_info * sta_info_get(struct ieee80211_local *local, u8 *addr);
+int sta_info_min_txrate_get(struct ieee80211_local *local);
+void sta_info_put(struct sta_info *sta);
+struct sta_info * sta_info_add(struct ieee80211_local *local,
+ struct net_device *dev, u8 *addr, gfp_t gfp);
+void sta_info_free(struct sta_info *sta, int locked);
+void sta_info_init(struct ieee80211_local *local);
+int sta_info_start(struct ieee80211_local *local);
+void sta_info_stop(struct ieee80211_local *local);
+void sta_info_remove_aid_ptr(struct sta_info *sta);
+void sta_info_flush(struct ieee80211_local *local, struct net_device *dev);
+
+#endif /* STA_INFO_H */
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
new file mode 100644
index 000000000000..41621720e560
--- /dev/null
+++ b/net/mac80211/tkip.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_key.h"
+#include "tkip.h"
+#include "wep.h"
+
+
+/* TKIP key mixing functions */
+
+
+#define PHASE1_LOOP_COUNT 8
+
+
+/* 2-byte by 2-byte subset of the full AES S-box table; second part of this
+ * table is identical to first part but byte-swapped */
+static const u16 tkip_sbox[256] =
+{
+ 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
+ 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
+ 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
+ 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
+ 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
+ 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
+ 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
+ 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
+ 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
+ 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
+ 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
+ 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
+ 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
+ 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
+ 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
+ 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
+ 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
+ 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
+ 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
+ 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
+ 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
+ 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
+ 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
+ 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
+ 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
+ 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
+ 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
+ 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
+ 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
+ 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
+ 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
+ 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
+};
+
+
+static inline u16 Mk16(u8 x, u8 y)
+{
+ return ((u16) x << 8) | (u16) y;
+}
+
+
+static inline u8 Hi8(u16 v)
+{
+ return v >> 8;
+}
+
+
+static inline u8 Lo8(u16 v)
+{
+ return v & 0xff;
+}
+
+
+static inline u16 Hi16(u32 v)
+{
+ return v >> 16;
+}
+
+
+static inline u16 Lo16(u32 v)
+{
+ return v & 0xffff;
+}
+
+
+static inline u16 RotR1(u16 v)
+{
+ return (v >> 1) | ((v & 0x0001) << 15);
+}
+
+
+static inline u16 tkip_S(u16 val)
+{
+ u16 a = tkip_sbox[Hi8(val)];
+
+ return tkip_sbox[Lo8(val)] ^ Hi8(a) ^ (Lo8(a) << 8);
+}
+
+
+
+/* P1K := Phase1(TA, TK, TSC)
+ * TA = transmitter address (48 bits)
+ * TK = dot11DefaultKeyValue or dot11KeyMappingValue (128 bits)
+ * TSC = TKIP sequence counter (48 bits, only 32 msb bits used)
+ * P1K: 80 bits
+ */
+static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32,
+ u16 *p1k)
+{
+ int i, j;
+
+ p1k[0] = Lo16(tsc_IV32);
+ p1k[1] = Hi16(tsc_IV32);
+ p1k[2] = Mk16(ta[1], ta[0]);
+ p1k[3] = Mk16(ta[3], ta[2]);
+ p1k[4] = Mk16(ta[5], ta[4]);
+
+ for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
+ j = 2 * (i & 1);
+ p1k[0] += tkip_S(p1k[4] ^ Mk16(tk[ 1 + j], tk[ 0 + j]));
+ p1k[1] += tkip_S(p1k[0] ^ Mk16(tk[ 5 + j], tk[ 4 + j]));
+ p1k[2] += tkip_S(p1k[1] ^ Mk16(tk[ 9 + j], tk[ 8 + j]));
+ p1k[3] += tkip_S(p1k[2] ^ Mk16(tk[13 + j], tk[12 + j]));
+ p1k[4] += tkip_S(p1k[3] ^ Mk16(tk[ 1 + j], tk[ 0 + j])) + i;
+ }
+}
+
+
+static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
+ u8 *rc4key)
+{
+ u16 ppk[6];
+ int i;
+
+ ppk[0] = p1k[0];
+ ppk[1] = p1k[1];
+ ppk[2] = p1k[2];
+ ppk[3] = p1k[3];
+ ppk[4] = p1k[4];
+ ppk[5] = p1k[4] + tsc_IV16;
+
+ ppk[0] += tkip_S(ppk[5] ^ Mk16(tk[ 1], tk[ 0]));
+ ppk[1] += tkip_S(ppk[0] ^ Mk16(tk[ 3], tk[ 2]));
+ ppk[2] += tkip_S(ppk[1] ^ Mk16(tk[ 5], tk[ 4]));
+ ppk[3] += tkip_S(ppk[2] ^ Mk16(tk[ 7], tk[ 6]));
+ ppk[4] += tkip_S(ppk[3] ^ Mk16(tk[ 9], tk[ 8]));
+ ppk[5] += tkip_S(ppk[4] ^ Mk16(tk[11], tk[10]));
+ ppk[0] += RotR1(ppk[5] ^ Mk16(tk[13], tk[12]));
+ ppk[1] += RotR1(ppk[0] ^ Mk16(tk[15], tk[14]));
+ ppk[2] += RotR1(ppk[1]);
+ ppk[3] += RotR1(ppk[2]);
+ ppk[4] += RotR1(ppk[3]);
+ ppk[5] += RotR1(ppk[4]);
+
+ rc4key[0] = Hi8(tsc_IV16);
+ rc4key[1] = (Hi8(tsc_IV16) | 0x20) & 0x7f;
+ rc4key[2] = Lo8(tsc_IV16);
+ rc4key[3] = Lo8((ppk[5] ^ Mk16(tk[1], tk[0])) >> 1);
+
+ for (i = 0; i < 6; i++) {
+ rc4key[4 + 2 * i] = Lo8(ppk[i]);
+ rc4key[5 + 2 * i] = Hi8(ppk[i]);
+ }
+}
+
+
+/* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
+ * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
+ * the packet payload). */
+u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
+ u8 iv0, u8 iv1, u8 iv2)
+{
+ *pos++ = iv0;
+ *pos++ = iv1;
+ *pos++ = iv2;
+ *pos++ = (key->keyidx << 6) | (1 << 5) /* Ext IV */;
+ *pos++ = key->u.tkip.iv32 & 0xff;
+ *pos++ = (key->u.tkip.iv32 >> 8) & 0xff;
+ *pos++ = (key->u.tkip.iv32 >> 16) & 0xff;
+ *pos++ = (key->u.tkip.iv32 >> 24) & 0xff;
+ return pos;
+}
+
+
+void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
+ u16 *phase1key)
+{
+ tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY],
+ key->u.tkip.iv32, phase1key);
+}
+
+void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
+ u8 *rc4key)
+{
+ /* Calculate per-packet key */
+ if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) {
+ /* IV16 wrapped around - perform TKIP phase 1 */
+ tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY],
+ key->u.tkip.iv32, key->u.tkip.p1k);
+ key->u.tkip.tx_initialized = 1;
+ }
+
+ tkip_mixing_phase2(key->u.tkip.p1k, &key->key[ALG_TKIP_TEMP_ENCR_KEY],
+ key->u.tkip.iv16, rc4key);
+}
+
+/* Encrypt packet payload with TKIP using @key. @pos is a pointer to the
+ * beginning of the buffer containing payload. This payload must include
+ * headroom of eight octets for IV and Ext. IV and taildroom of four octets
+ * for ICV. @payload_len is the length of payload (_not_ including extra
+ * headroom and tailroom). @ta is the transmitter addresses. */
+void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
+ struct ieee80211_key *key,
+ u8 *pos, size_t payload_len, u8 *ta)
+{
+ u8 rc4key[16];
+
+ ieee80211_tkip_gen_rc4key(key, ta, rc4key);
+ pos = ieee80211_tkip_add_iv(pos, key, rc4key[0], rc4key[1], rc4key[2]);
+ ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
+}
+
+
+/* Decrypt packet payload with TKIP using @key. @pos is a pointer to the
+ * beginning of the buffer containing IEEE 802.11 header payload, i.e.,
+ * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the
+ * length of payload, including IV, Ext. IV, MIC, ICV. */
+int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
+ struct ieee80211_key *key,
+ u8 *payload, size_t payload_len, u8 *ta,
+ int only_iv, int queue)
+{
+ u32 iv32;
+ u32 iv16;
+ u8 rc4key[16], keyid, *pos = payload;
+ int res;
+
+ if (payload_len < 12)
+ return -1;
+
+ iv16 = (pos[0] << 8) | pos[2];
+ keyid = pos[3];
+ iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
+ pos += 8;
+#ifdef CONFIG_TKIP_DEBUG
+ {
+ int i;
+ printk(KERN_DEBUG "TKIP decrypt: data(len=%zd)", payload_len);
+ for (i = 0; i < payload_len; i++)
+ printk(" %02x", payload[i]);
+ printk("\n");
+ printk(KERN_DEBUG "TKIP decrypt: iv16=%04x iv32=%08x\n",
+ iv16, iv32);
+ }
+#endif /* CONFIG_TKIP_DEBUG */
+
+ if (!(keyid & (1 << 5)))
+ return TKIP_DECRYPT_NO_EXT_IV;
+
+ if ((keyid >> 6) != key->keyidx)
+ return TKIP_DECRYPT_INVALID_KEYIDX;
+
+ if (key->u.tkip.rx_initialized[queue] &&
+ (iv32 < key->u.tkip.iv32_rx[queue] ||
+ (iv32 == key->u.tkip.iv32_rx[queue] &&
+ iv16 <= key->u.tkip.iv16_rx[queue]))) {
+#ifdef CONFIG_TKIP_DEBUG
+ printk(KERN_DEBUG "TKIP replay detected for RX frame from "
+ MAC_FMT " (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n",
+ MAC_ARG(ta),
+ iv32, iv16, key->u.tkip.iv32_rx[queue],
+ key->u.tkip.iv16_rx[queue]);
+#endif /* CONFIG_TKIP_DEBUG */
+ return TKIP_DECRYPT_REPLAY;
+ }
+
+ if (only_iv) {
+ res = TKIP_DECRYPT_OK;
+ key->u.tkip.rx_initialized[queue] = 1;
+ goto done;
+ }
+
+ if (!key->u.tkip.rx_initialized[queue] ||
+ key->u.tkip.iv32_rx[queue] != iv32) {
+ key->u.tkip.rx_initialized[queue] = 1;
+ /* IV16 wrapped around - perform TKIP phase 1 */
+ tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY],
+ iv32, key->u.tkip.p1k_rx[queue]);
+#ifdef CONFIG_TKIP_DEBUG
+ {
+ int i;
+ printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=" MAC_FMT
+ " TK=", MAC_ARG(ta));
+ for (i = 0; i < 16; i++)
+ printk("%02x ",
+ key->key[ALG_TKIP_TEMP_ENCR_KEY + i]);
+ printk("\n");
+ printk(KERN_DEBUG "TKIP decrypt: P1K=");
+ for (i = 0; i < 5; i++)
+ printk("%04x ", key->u.tkip.p1k_rx[queue][i]);
+ printk("\n");
+ }
+#endif /* CONFIG_TKIP_DEBUG */
+ }
+
+ tkip_mixing_phase2(key->u.tkip.p1k_rx[queue],
+ &key->key[ALG_TKIP_TEMP_ENCR_KEY],
+ iv16, rc4key);
+#ifdef CONFIG_TKIP_DEBUG
+ {
+ int i;
+ printk(KERN_DEBUG "TKIP decrypt: Phase2 rc4key=");
+ for (i = 0; i < 16; i++)
+ printk("%02x ", rc4key[i]);
+ printk("\n");
+ }
+#endif /* CONFIG_TKIP_DEBUG */
+
+ res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
+ done:
+ if (res == TKIP_DECRYPT_OK) {
+ /* FIX: these should be updated only after Michael MIC has been
+ * verified */
+ /* Record previously received IV */
+ key->u.tkip.iv32_rx[queue] = iv32;
+ key->u.tkip.iv16_rx[queue] = iv16;
+ }
+
+ return res;
+}
+
+
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
new file mode 100644
index 000000000000..a0d181a18049
--- /dev/null
+++ b/net/mac80211/tkip.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TKIP_H
+#define TKIP_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include "ieee80211_key.h"
+
+u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
+ u8 iv0, u8 iv1, u8 iv2);
+void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
+ u16 *phase1key);
+void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
+ u8 *rc4key);
+void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
+ struct ieee80211_key *key,
+ u8 *pos, size_t payload_len, u8 *ta);
+enum {
+ TKIP_DECRYPT_OK = 0,
+ TKIP_DECRYPT_NO_EXT_IV = -1,
+ TKIP_DECRYPT_INVALID_KEYIDX = -2,
+ TKIP_DECRYPT_REPLAY = -3,
+};
+int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
+ struct ieee80211_key *key,
+ u8 *payload, size_t payload_len, u8 *ta,
+ int only_iv, int queue);
+
+#endif /* TKIP_H */
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
new file mode 100644
index 000000000000..1ad3d75281cc
--- /dev/null
+++ b/net/mac80211/wep.c
@@ -0,0 +1,328 @@
+/*
+ * Software WEP encryption implementation
+ * Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2003, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/compiler.h>
+#include <linux/crc32.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <asm/scatterlist.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "wep.h"
+
+
+int ieee80211_wep_init(struct ieee80211_local *local)
+{
+ /* start WEP IV from a random value */
+ get_random_bytes(&local->wep_iv, WEP_IV_LEN);
+
+ local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(local->wep_tx_tfm))
+ return -ENOMEM;
+
+ local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(local->wep_rx_tfm)) {
+ crypto_free_blkcipher(local->wep_tx_tfm);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void ieee80211_wep_free(struct ieee80211_local *local)
+{
+ crypto_free_blkcipher(local->wep_tx_tfm);
+ crypto_free_blkcipher(local->wep_rx_tfm);
+}
+
+static inline int ieee80211_wep_weak_iv(u32 iv, int keylen)
+{
+ /* Fluhrer, Mantin, and Shamir have reported weaknesses in the
+ * key scheduling algorithm of RC4. At least IVs (KeyByte + 3,
+ * 0xff, N) can be used to speedup attacks, so avoid using them. */
+ if ((iv & 0xff00) == 0xff00) {
+ u8 B = (iv >> 16) & 0xff;
+ if (B >= 3 && B < 3 + keylen)
+ return 1;
+ }
+ return 0;
+}
+
+
+void ieee80211_wep_get_iv(struct ieee80211_local *local,
+ struct ieee80211_key *key, u8 *iv)
+{
+ local->wep_iv++;
+ if (ieee80211_wep_weak_iv(local->wep_iv, key->keylen))
+ local->wep_iv += 0x0100;
+
+ if (!iv)
+ return;
+
+ *iv++ = (local->wep_iv >> 16) & 0xff;
+ *iv++ = (local->wep_iv >> 8) & 0xff;
+ *iv++ = local->wep_iv & 0xff;
+ *iv++ = key->keyidx << 6;
+}
+
+
+u8 * ieee80211_wep_add_iv(struct ieee80211_local *local,
+ struct sk_buff *skb,
+ struct ieee80211_key *key)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+ u8 *newhdr;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ fc |= IEEE80211_FCTL_PROTECTED;
+ hdr->frame_control = cpu_to_le16(fc);
+
+ if ((skb_headroom(skb) < WEP_IV_LEN ||
+ skb_tailroom(skb) < WEP_ICV_LEN)) {
+ I802_DEBUG_INC(local->tx_expand_skb_head);
+ if (unlikely(pskb_expand_head(skb, WEP_IV_LEN, WEP_ICV_LEN,
+ GFP_ATOMIC)))
+ return NULL;
+ }
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+ newhdr = skb_push(skb, WEP_IV_LEN);
+ memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen);
+ ieee80211_wep_get_iv(local, key, newhdr + hdrlen);
+ return newhdr + hdrlen;
+}
+
+
+void ieee80211_wep_remove_iv(struct ieee80211_local *local,
+ struct sk_buff *skb,
+ struct ieee80211_key *key)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+ memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen);
+ skb_pull(skb, WEP_IV_LEN);
+}
+
+
+/* Perform WEP encryption using given key. data buffer must have tailroom
+ * for 4-byte ICV. data_len must not include this ICV. Note: this function
+ * does _not_ add IV. data = RC4(data | CRC32(data)) */
+void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+ size_t klen, u8 *data, size_t data_len)
+{
+ struct blkcipher_desc desc = { .tfm = tfm };
+ struct scatterlist sg;
+ __le32 *icv;
+
+ icv = (__le32 *)(data + data_len);
+ *icv = cpu_to_le32(~crc32_le(~0, data, data_len));
+
+ crypto_blkcipher_setkey(tfm, rc4key, klen);
+ sg.page = virt_to_page(data);
+ sg.offset = offset_in_page(data);
+ sg.length = data_len + WEP_ICV_LEN;
+ crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length);
+}
+
+
+/* Perform WEP encryption on given skb. 4 bytes of extra space (IV) in the
+ * beginning of the buffer 4 bytes of extra space (ICV) in the end of the
+ * buffer will be added. Both IV and ICV will be transmitted, so the
+ * payload length increases with 8 bytes.
+ *
+ * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
+ */
+int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_key *key)
+{
+ u32 klen;
+ u8 *rc4key, *iv;
+ size_t len;
+
+ if (!key || key->alg != ALG_WEP)
+ return -1;
+
+ klen = 3 + key->keylen;
+ rc4key = kmalloc(klen, GFP_ATOMIC);
+ if (!rc4key)
+ return -1;
+
+ iv = ieee80211_wep_add_iv(local, skb, key);
+ if (!iv) {
+ kfree(rc4key);
+ return -1;
+ }
+
+ len = skb->len - (iv + WEP_IV_LEN - skb->data);
+
+ /* Prepend 24-bit IV to RC4 key */
+ memcpy(rc4key, iv, 3);
+
+ /* Copy rest of the WEP key (the secret part) */
+ memcpy(rc4key + 3, key->key, key->keylen);
+
+ /* Add room for ICV */
+ skb_put(skb, WEP_ICV_LEN);
+
+ ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, klen,
+ iv + WEP_IV_LEN, len);
+
+ kfree(rc4key);
+
+ return 0;
+}
+
+
+/* Perform WEP decryption using given key. data buffer includes encrypted
+ * payload, including 4-byte ICV, but _not_ IV. data_len must not include ICV.
+ * Return 0 on success and -1 on ICV mismatch. */
+int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+ size_t klen, u8 *data, size_t data_len)
+{
+ struct blkcipher_desc desc = { .tfm = tfm };
+ struct scatterlist sg;
+ __le32 crc;
+
+ crypto_blkcipher_setkey(tfm, rc4key, klen);
+ sg.page = virt_to_page(data);
+ sg.offset = offset_in_page(data);
+ sg.length = data_len + WEP_ICV_LEN;
+ crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length);
+
+ crc = cpu_to_le32(~crc32_le(~0, data, data_len));
+ if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0)
+ /* ICV mismatch */
+ return -1;
+
+ return 0;
+}
+
+
+/* Perform WEP decryption on given skb. Buffer includes whole WEP part of
+ * the frame: IV (4 bytes), encrypted payload (including SNAP header),
+ * ICV (4 bytes). skb->len includes both IV and ICV.
+ *
+ * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
+ * failure. If frame is OK, IV and ICV will be removed, i.e., decrypted payload
+ * is moved to the beginning of the skb and skb length will be reduced.
+ */
+int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_key *key)
+{
+ u32 klen;
+ u8 *rc4key;
+ u8 keyidx;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+ size_t len;
+ int ret = 0;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ if (!(fc & IEEE80211_FCTL_PROTECTED))
+ return -1;
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ if (skb->len < 8 + hdrlen)
+ return -1;
+
+ len = skb->len - hdrlen - 8;
+
+ keyidx = skb->data[hdrlen + 3] >> 6;
+
+ if (!key || keyidx != key->keyidx || key->alg != ALG_WEP)
+ return -1;
+
+ klen = 3 + key->keylen;
+
+ rc4key = kmalloc(klen, GFP_ATOMIC);
+ if (!rc4key)
+ return -1;
+
+ /* Prepend 24-bit IV to RC4 key */
+ memcpy(rc4key, skb->data + hdrlen, 3);
+
+ /* Copy rest of the WEP key (the secret part) */
+ memcpy(rc4key + 3, key->key, key->keylen);
+
+ if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
+ skb->data + hdrlen + WEP_IV_LEN,
+ len)) {
+ printk(KERN_DEBUG "WEP decrypt failed (ICV)\n");
+ ret = -1;
+ }
+
+ kfree(rc4key);
+
+ /* Trim ICV */
+ skb_trim(skb, skb->len - WEP_ICV_LEN);
+
+ /* Remove IV */
+ memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen);
+ skb_pull(skb, WEP_IV_LEN);
+
+ return ret;
+}
+
+
+int ieee80211_wep_get_keyidx(struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ if (!(fc & IEEE80211_FCTL_PROTECTED))
+ return -1;
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ if (skb->len < 8 + hdrlen)
+ return -1;
+
+ return skb->data[hdrlen + 3] >> 6;
+}
+
+
+u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u16 fc;
+ int hdrlen;
+ u8 *ivpos;
+ u32 iv;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ if (!(fc & IEEE80211_FCTL_PROTECTED))
+ return NULL;
+
+ hdrlen = ieee80211_get_hdrlen(fc);
+ ivpos = skb->data + hdrlen;
+ iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2];
+
+ if (ieee80211_wep_weak_iv(iv, key->keylen))
+ return ivpos;
+
+ return NULL;
+}
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
new file mode 100644
index 000000000000..bfe29e8e10aa
--- /dev/null
+++ b/net/mac80211/wep.h
@@ -0,0 +1,40 @@
+/*
+ * Software WEP encryption implementation
+ * Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2003, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef WEP_H
+#define WEP_H
+
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include "ieee80211_i.h"
+#include "ieee80211_key.h"
+
+int ieee80211_wep_init(struct ieee80211_local *local);
+void ieee80211_wep_free(struct ieee80211_local *local);
+void ieee80211_wep_get_iv(struct ieee80211_local *local,
+ struct ieee80211_key *key, u8 *iv);
+u8 * ieee80211_wep_add_iv(struct ieee80211_local *local,
+ struct sk_buff *skb,
+ struct ieee80211_key *key);
+void ieee80211_wep_remove_iv(struct ieee80211_local *local,
+ struct sk_buff *skb,
+ struct ieee80211_key *key);
+void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+ size_t klen, u8 *data, size_t data_len);
+int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
+ size_t klen, u8 *data, size_t data_len);
+int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_key *key);
+int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_key *key);
+int ieee80211_wep_get_keyidx(struct sk_buff *skb);
+u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key);
+
+#endif /* WEP_H */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
new file mode 100644
index 000000000000..89ce81529694
--- /dev/null
+++ b/net/mac80211/wme.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2004, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/pkt_sched.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "wme.h"
+
+static inline int WLAN_FC_IS_QOS_DATA(u16 fc)
+{
+ return (fc & 0x8C) == 0x88;
+}
+
+
+ieee80211_txrx_result
+ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
+{
+ u8 *data = rx->skb->data;
+ int tid;
+
+ /* does the frame have a qos control field? */
+ if (WLAN_FC_IS_QOS_DATA(rx->fc)) {
+ u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN;
+ /* frame has qos control */
+ tid = qc[0] & QOS_CONTROL_TID_MASK;
+ } else {
+ if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
+ /* Separate TID for management frames */
+ tid = NUM_RX_DATA_QUEUES - 1;
+ } else {
+ /* no qos control present */
+ tid = 0; /* 802.1d - Best Effort */
+ }
+ }
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
+ I802_DEBUG_INC(rx->local->wme_rx_queue[tid]);
+ if (rx->sta) {
+ I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]);
+ }
+#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */
+
+ rx->u.rx.queue = tid;
+ /* Set skb->priority to 1d tag if highest order bit of TID is not set.
+ * For now, set skb->priority to 0 for other cases. */
+ rx->skb->priority = (tid > 7) ? 0 : tid;
+
+ return TXRX_CONTINUE;
+}
+
+
+ieee80211_txrx_result
+ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx)
+{
+ u16 fc = rx->fc;
+ u8 *data = rx->skb->data;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data;
+
+ if (!WLAN_FC_IS_QOS_DATA(fc))
+ return TXRX_CONTINUE;
+
+ /* remove the qos control field, update frame type and meta-data */
+ memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2);
+ hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2);
+ /* change frame type to non QOS */
+ rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA;
+ hdr->frame_control = cpu_to_le16(fc);
+
+ return TXRX_CONTINUE;
+}
+
+
+#ifdef CONFIG_NET_SCHED
+/* maximum number of hardware queues we support. */
+#define TC_80211_MAX_QUEUES 8
+
+struct ieee80211_sched_data
+{
+ struct tcf_proto *filter_list;
+ struct Qdisc *queues[TC_80211_MAX_QUEUES];
+ struct sk_buff_head requeued[TC_80211_MAX_QUEUES];
+};
+
+
+/* given a data frame determine the 802.1p/1d tag to use */
+static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
+{
+ struct iphdr *ip;
+ int dscp;
+ int offset;
+
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct tcf_result res = { -1, 0 };
+
+ /* if there is a user set filter list, call out to that */
+ if (q->filter_list) {
+ tc_classify(skb, q->filter_list, &res);
+ if (res.class != -1)
+ return res.class;
+ }
+
+ /* skb->priority values from 256->263 are magic values to
+ * directly indicate a specific 802.1d priority.
+ * This is used to allow 802.1d priority to be passed directly in
+ * from VLAN tags, etc. */
+ if (skb->priority >= 256 && skb->priority <= 263)
+ return skb->priority - 256;
+
+ /* check there is a valid IP header present */
+ offset = ieee80211_get_hdrlen_from_skb(skb) + 8 /* LLC + proto */;
+ if (skb->protocol != __constant_htons(ETH_P_IP) ||
+ skb->len < offset + sizeof(*ip))
+ return 0;
+
+ ip = (struct iphdr *) (skb->data + offset);
+
+ dscp = ip->tos & 0xfc;
+ if (dscp & 0x1c)
+ return 0;
+ return dscp >> 5;
+}
+
+
+static inline int wme_downgrade_ac(struct sk_buff *skb)
+{
+ switch (skb->priority) {
+ case 6:
+ case 7:
+ skb->priority = 5; /* VO -> VI */
+ return 0;
+ case 4:
+ case 5:
+ skb->priority = 3; /* VI -> BE */
+ return 0;
+ case 0:
+ case 3:
+ skb->priority = 2; /* BE -> BK */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+
+/* positive return value indicates which queue to use
+ * negative return value indicates to drop the frame */
+static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd)
+{
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_tx_packet_data *pkt_data =
+ (struct ieee80211_tx_packet_data *) skb->cb;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ unsigned short fc = le16_to_cpu(hdr->frame_control);
+ int qos;
+ const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 };
+
+ /* see if frame is data or non data frame */
+ if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) {
+ /* management frames go on AC_VO queue, but are sent
+ * without QoS control fields */
+ return IEEE80211_TX_QUEUE_DATA0;
+ }
+
+ if (unlikely(pkt_data->mgmt_iface)) {
+ /* Data frames from hostapd (mainly, EAPOL) use AC_VO
+ * and they will include QoS control fields if
+ * the target STA is using WME. */
+ skb->priority = 7;
+ return ieee802_1d_to_ac[skb->priority];
+ }
+
+ /* is this a QoS frame? */
+ qos = fc & IEEE80211_STYPE_QOS_DATA;
+
+ if (!qos) {
+ skb->priority = 0; /* required for correct WPA/11i MIC */
+ return ieee802_1d_to_ac[skb->priority];
+ }
+
+ /* use the data classifier to determine what 802.1d tag the
+ * data frame has */
+ skb->priority = classify_1d(skb, qd);
+
+ /* incase we are a client verify acm is not set for this ac */
+ while (unlikely(local->wmm_acm & BIT(skb->priority))) {
+ if (wme_downgrade_ac(skb)) {
+ /* No AC with lower priority has acm=0,
+ * drop packet. */
+ return -1;
+ }
+ }
+
+ /* look up which queue to use for frames with this 1d tag */
+ return ieee802_1d_to_ac[skb->priority];
+}
+
+
+static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
+{
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_tx_packet_data *pkt_data =
+ (struct ieee80211_tx_packet_data *) skb->cb;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ unsigned short fc = le16_to_cpu(hdr->frame_control);
+ struct Qdisc *qdisc;
+ int err, queue;
+
+ if (pkt_data->requeue) {
+ skb_queue_tail(&q->requeued[pkt_data->queue], skb);
+ qd->q.qlen++;
+ return 0;
+ }
+
+ queue = classify80211(skb, qd);
+
+ /* now we know the 1d priority, fill in the QoS header if there is one
+ */
+ if (WLAN_FC_IS_QOS_DATA(fc)) {
+ u8 *p = skb->data + ieee80211_get_hdrlen(fc) - 2;
+ u8 qos_hdr = skb->priority & QOS_CONTROL_TAG1D_MASK;
+ if (local->wifi_wme_noack_test)
+ qos_hdr |= QOS_CONTROL_ACK_POLICY_NOACK <<
+ QOS_CONTROL_ACK_POLICY_SHIFT;
+ /* qos header is 2 bytes, second reserved */
+ *p = qos_hdr;
+ p++;
+ *p = 0;
+ }
+
+ if (unlikely(queue >= local->hw.queues)) {
+#if 0
+ if (net_ratelimit()) {
+ printk(KERN_DEBUG "%s - queue=%d (hw does not "
+ "support) -> %d\n",
+ __func__, queue, local->hw.queues - 1);
+ }
+#endif
+ queue = local->hw.queues - 1;
+ }
+
+ if (unlikely(queue < 0)) {
+ kfree_skb(skb);
+ err = NET_XMIT_DROP;
+ } else {
+ pkt_data->queue = (unsigned int) queue;
+ qdisc = q->queues[queue];
+ err = qdisc->enqueue(skb, qdisc);
+ if (err == NET_XMIT_SUCCESS) {
+ qd->q.qlen++;
+ qd->bstats.bytes += skb->len;
+ qd->bstats.packets++;
+ return NET_XMIT_SUCCESS;
+ }
+ }
+ qd->qstats.drops++;
+ return err;
+}
+
+
+/* TODO: clean up the cases where master_hard_start_xmit
+ * returns non 0 - it shouldn't ever do that. Once done we
+ * can remove this function */
+static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_tx_packet_data *pkt_data =
+ (struct ieee80211_tx_packet_data *) skb->cb;
+ struct Qdisc *qdisc;
+ int err;
+
+ /* we recorded which queue to use earlier! */
+ qdisc = q->queues[pkt_data->queue];
+
+ if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) {
+ qd->q.qlen++;
+ return 0;
+ }
+ qd->qstats.drops++;
+ return err;
+}
+
+
+static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct net_device *dev = qd->dev;
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ struct sk_buff *skb;
+ struct Qdisc *qdisc;
+ int queue;
+
+ /* check all the h/w queues in numeric/priority order */
+ for (queue = 0; queue < hw->queues; queue++) {
+ /* see if there is room in this hardware queue */
+ if (test_bit(IEEE80211_LINK_STATE_XOFF,
+ &local->state[queue]) ||
+ test_bit(IEEE80211_LINK_STATE_PENDING,
+ &local->state[queue]))
+ continue;
+
+ /* there is space - try and get a frame */
+ skb = skb_dequeue(&q->requeued[queue]);
+ if (skb) {
+ qd->q.qlen--;
+ return skb;
+ }
+
+ qdisc = q->queues[queue];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ qd->q.qlen--;
+ return skb;
+ }
+ }
+ /* returning a NULL here when all the h/w queues are full means we
+ * never need to call netif_stop_queue in the driver */
+ return NULL;
+}
+
+
+static void wme_qdiscop_reset(struct Qdisc* qd)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ int queue;
+
+ /* QUESTION: should we have some hardware flush functionality here? */
+
+ for (queue = 0; queue < hw->queues; queue++) {
+ skb_queue_purge(&q->requeued[queue]);
+ qdisc_reset(q->queues[queue]);
+ }
+ qd->q.qlen = 0;
+}
+
+
+static void wme_qdiscop_destroy(struct Qdisc* qd)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ int queue;
+
+ tcf_destroy_chain(q->filter_list);
+ q->filter_list = NULL;
+
+ for (queue=0; queue < hw->queues; queue++) {
+ skb_queue_purge(&q->requeued[queue]);
+ qdisc_destroy(q->queues[queue]);
+ q->queues[queue] = &noop_qdisc;
+ }
+}
+
+
+/* called whenever parameters are updated on existing qdisc */
+static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt)
+{
+/* struct ieee80211_sched_data *q = qdisc_priv(qd);
+*/
+ /* check our options block is the right size */
+ /* copy any options to our local structure */
+/* Ignore options block for now - always use static mapping
+ struct tc_ieee80211_qopt *qopt = RTA_DATA(opt);
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+ return -EINVAL;
+ memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue));
+*/
+ return 0;
+}
+
+
+/* called during initial creation of qdisc on device */
+static int wme_qdiscop_init(struct Qdisc *qd, struct rtattr *opt)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct net_device *dev = qd->dev;
+ struct ieee80211_local *local;
+ int queues;
+ int err = 0, i;
+
+ /* check that device is a mac80211 device */
+ if (!dev->ieee80211_ptr ||
+ dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
+ return -EINVAL;
+
+ /* check this device is an ieee80211 master type device */
+ if (dev->type != ARPHRD_IEEE80211)
+ return -EINVAL;
+
+ /* check that there is no qdisc currently attached to device
+ * this ensures that we will be the root qdisc. (I can't find a better
+ * way to test this explicitly) */
+ if (dev->qdisc_sleeping != &noop_qdisc)
+ return -EINVAL;
+
+ if (qd->flags & TCQ_F_INGRESS)
+ return -EINVAL;
+
+ local = wdev_priv(dev->ieee80211_ptr);
+ queues = local->hw.queues;
+
+ /* if options were passed in, set them */
+ if (opt) {
+ err = wme_qdiscop_tune(qd, opt);
+ }
+
+ /* create child queues */
+ for (i = 0; i < queues; i++) {
+ skb_queue_head_init(&q->requeued[i]);
+ q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops,
+ qd->handle);
+ if (q->queues[i] == 0) {
+ q->queues[i] = &noop_qdisc;
+ printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i);
+ }
+ }
+
+ return err;
+}
+
+static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
+{
+/* struct ieee80211_sched_data *q = qdisc_priv(qd);
+ unsigned char *p = skb->tail;
+ struct tc_ieee80211_qopt opt;
+
+ memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1);
+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+*/ return skb->len;
+/*
+rtattr_failure:
+ skb_trim(skb, p - skb->data);*/
+ return -1;
+}
+
+
+static int wme_classop_graft(struct Qdisc *qd, unsigned long arg,
+ struct Qdisc *new, struct Qdisc **old)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ unsigned long queue = arg - 1;
+
+ if (queue >= hw->queues)
+ return -EINVAL;
+
+ if (!new)
+ new = &noop_qdisc;
+
+ sch_tree_lock(qd);
+ *old = q->queues[queue];
+ q->queues[queue] = new;
+ qdisc_reset(*old);
+ sch_tree_unlock(qd);
+
+ return 0;
+}
+
+
+static struct Qdisc *
+wme_classop_leaf(struct Qdisc *qd, unsigned long arg)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ unsigned long queue = arg - 1;
+
+ if (queue >= hw->queues)
+ return NULL;
+
+ return q->queues[queue];
+}
+
+
+static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid)
+{
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ unsigned long queue = TC_H_MIN(classid);
+
+ if (queue - 1 >= hw->queues)
+ return 0;
+
+ return queue;
+}
+
+
+static unsigned long wme_classop_bind(struct Qdisc *qd, unsigned long parent,
+ u32 classid)
+{
+ return wme_classop_get(qd, classid);
+}
+
+
+static void wme_classop_put(struct Qdisc *q, unsigned long cl)
+{
+}
+
+
+static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
+ struct rtattr **tca, unsigned long *arg)
+{
+ unsigned long cl = *arg;
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+
+ if (cl - 1 > hw->queues)
+ return -ENOENT;
+
+ /* TODO: put code to program hardware queue parameters here,
+ * to allow programming from tc command line */
+
+ return 0;
+}
+
+
+/* we don't support deleting hardware queues
+ * when we add WMM-SA support - TSPECs may be deleted here */
+static int wme_classop_delete(struct Qdisc *qd, unsigned long cl)
+{
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+
+ if (cl - 1 > hw->queues)
+ return -ENOENT;
+ return 0;
+}
+
+
+static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+
+ if (cl - 1 > hw->queues)
+ return -ENOENT;
+ tcm->tcm_handle = TC_H_MIN(cl);
+ tcm->tcm_parent = qd->handle;
+ tcm->tcm_info = q->queues[cl-1]->handle; /* do we need this? */
+ return 0;
+}
+
+
+static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg)
+{
+ struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+ struct ieee80211_hw *hw = &local->hw;
+ int queue;
+
+ if (arg->stop)
+ return;
+
+ for (queue = 0; queue < hw->queues; queue++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ /* we should return classids for our internal queues here
+ * as well as the external ones */
+ if (arg->fn(qd, queue+1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+
+static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd,
+ unsigned long cl)
+{
+ struct ieee80211_sched_data *q = qdisc_priv(qd);
+
+ if (cl)
+ return NULL;
+
+ return &q->filter_list;
+}
+
+
+/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached)
+ * - these are the operations on the classes */
+static struct Qdisc_class_ops class_ops =
+{
+ .graft = wme_classop_graft,
+ .leaf = wme_classop_leaf,
+
+ .get = wme_classop_get,
+ .put = wme_classop_put,
+ .change = wme_classop_change,
+ .delete = wme_classop_delete,
+ .walk = wme_classop_walk,
+
+ .tcf_chain = wme_classop_find_tcf,
+ .bind_tcf = wme_classop_bind,
+ .unbind_tcf = wme_classop_put,
+
+ .dump = wme_classop_dump_class,
+};
+
+
+/* queueing discipline operations */
+static struct Qdisc_ops wme_qdisc_ops =
+{
+ .next = NULL,
+ .cl_ops = &class_ops,
+ .id = "ieee80211",
+ .priv_size = sizeof(struct ieee80211_sched_data),
+
+ .enqueue = wme_qdiscop_enqueue,
+ .dequeue = wme_qdiscop_dequeue,
+ .requeue = wme_qdiscop_requeue,
+ .drop = NULL, /* drop not needed since we are always the root qdisc */
+
+ .init = wme_qdiscop_init,
+ .reset = wme_qdiscop_reset,
+ .destroy = wme_qdiscop_destroy,
+ .change = wme_qdiscop_tune,
+
+ .dump = wme_qdiscop_dump,
+};
+
+
+void ieee80211_install_qdisc(struct net_device *dev)
+{
+ struct Qdisc *qdisc;
+
+ qdisc = qdisc_create_dflt(dev, &wme_qdisc_ops, TC_H_ROOT);
+ if (!qdisc) {
+ printk(KERN_ERR "%s: qdisc installation failed\n", dev->name);
+ return;
+ }
+
+ /* same handle as would be allocated by qdisc_alloc_handle() */
+ qdisc->handle = 0x80010000;
+
+ qdisc_lock_tree(dev);
+ list_add_tail(&qdisc->list, &dev->qdisc_list);
+ dev->qdisc_sleeping = qdisc;
+ qdisc_unlock_tree(dev);
+}
+
+
+int ieee80211_qdisc_installed(struct net_device *dev)
+{
+ return dev->qdisc_sleeping->ops == &wme_qdisc_ops;
+}
+
+
+int ieee80211_wme_register(void)
+{
+ return register_qdisc(&wme_qdisc_ops);
+}
+
+
+void ieee80211_wme_unregister(void)
+{
+ unregister_qdisc(&wme_qdisc_ops);
+}
+#endif /* CONFIG_NET_SCHED */
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
new file mode 100644
index 000000000000..f0bff10f0e08
--- /dev/null
+++ b/net/mac80211/wme.h
@@ -0,0 +1,57 @@
+/*
+ * IEEE 802.11 driver (80211.o) - QoS datatypes
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _WME_H
+#define _WME_H
+
+#include <linux/netdevice.h>
+#include "ieee80211_i.h"
+
+#define QOS_CONTROL_LEN 2
+
+#define QOS_CONTROL_ACK_POLICY_NORMAL 0
+#define QOS_CONTROL_ACK_POLICY_NOACK 1
+
+#define QOS_CONTROL_TID_MASK 0x0f
+#define QOS_CONTROL_ACK_POLICY_SHIFT 5
+
+#define QOS_CONTROL_TAG1D_MASK 0x07
+
+ieee80211_txrx_result
+ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx);
+
+ieee80211_txrx_result
+ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx);
+
+#ifdef CONFIG_NET_SCHED
+void ieee80211_install_qdisc(struct net_device *dev);
+int ieee80211_qdisc_installed(struct net_device *dev);
+
+int ieee80211_wme_register(void);
+void ieee80211_wme_unregister(void);
+#else
+static inline void ieee80211_install_qdisc(struct net_device *dev)
+{
+}
+static inline int ieee80211_qdisc_installed(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline int ieee80211_wme_register(void)
+{
+ return 0;
+}
+static inline void ieee80211_wme_unregister(void)
+{
+}
+#endif /* CONFIG_NET_SCHED */
+
+#endif /* _WME_H */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
new file mode 100644
index 000000000000..783af32c6911
--- /dev/null
+++ b/net/mac80211/wpa.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/compiler.h>
+#include <net/iw_handler.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_common.h"
+#include "ieee80211_i.h"
+#include "michael.h"
+#include "tkip.h"
+#include "aes_ccm.h"
+#include "wpa.h"
+
+static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da,
+ u8 *qos_tid, u8 **data, size_t *data_len)
+{
+ struct ieee80211_hdr *hdr;
+ size_t hdrlen;
+ u16 fc;
+ int a4_included;
+ u8 *pos;
+
+ hdr = (struct ieee80211_hdr *) skb->data;
+ fc = le16_to_cpu(hdr->frame_control);
+
+ hdrlen = 24;
+ if ((fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) ==
+ (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
+ hdrlen += ETH_ALEN;
+ *sa = hdr->addr4;
+ *da = hdr->addr3;
+ } else if (fc & IEEE80211_FCTL_FROMDS) {
+ *sa = hdr->addr3;
+ *da = hdr->addr1;
+ } else if (fc & IEEE80211_FCTL_TODS) {
+ *sa = hdr->addr2;
+ *da = hdr->addr3;
+ } else {
+ *sa = hdr->addr2;
+ *da = hdr->addr1;
+ }
+
+ if (fc & 0x80)
+ hdrlen += 2;
+
+ *data = skb->data + hdrlen;
+ *data_len = skb->len - hdrlen;
+
+ a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+ (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS);
+ if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+ fc & IEEE80211_STYPE_QOS_DATA) {
+ pos = (u8 *) &hdr->addr4;
+ if (a4_included)
+ pos += 6;
+ *qos_tid = pos[0] & 0x0f;
+ *qos_tid |= 0x80; /* qos_included flag */
+ } else
+ *qos_tid = 0;
+
+ return skb->len < hdrlen ? -1 : 0;
+}
+
+
+ieee80211_txrx_result
+ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx)
+{
+ u8 *data, *sa, *da, *key, *mic, qos_tid;
+ size_t data_len;
+ u16 fc;
+ struct sk_buff *skb = tx->skb;
+ int authenticator;
+ int wpa_test = 0;
+
+ fc = tx->fc;
+
+ if (!tx->key || tx->key->alg != ALG_TKIP || skb->len < 24 ||
+ !WLAN_FC_DATA_PRESENT(fc))
+ return TXRX_CONTINUE;
+
+ if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len))
+ return TXRX_DROP;
+
+ if (!tx->key->force_sw_encrypt &&
+ !tx->fragmented &&
+ !(tx->local->hw.flags & IEEE80211_HW_TKIP_INCLUDE_MMIC) &&
+ !wpa_test) {
+ /* hwaccel - with no need for preallocated room for Michael MIC
+ */
+ return TXRX_CONTINUE;
+ }
+
+ if (skb_tailroom(skb) < MICHAEL_MIC_LEN) {
+ I802_DEBUG_INC(tx->local->tx_expand_skb_head);
+ if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN,
+ MICHAEL_MIC_LEN + TKIP_ICV_LEN,
+ GFP_ATOMIC))) {
+ printk(KERN_DEBUG "%s: failed to allocate more memory "
+ "for Michael MIC\n", tx->dev->name);
+ return TXRX_DROP;
+ }
+ }
+
+#if 0
+ authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */
+#else
+ authenticator = 1;
+#endif
+ key = &tx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY :
+ ALG_TKIP_TEMP_AUTH_RX_MIC_KEY];
+ mic = skb_put(skb, MICHAEL_MIC_LEN);
+ michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
+
+ return TXRX_CONTINUE;
+}
+
+
+ieee80211_txrx_result
+ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx)
+{
+ u8 *data, *sa, *da, *key = NULL, qos_tid;
+ size_t data_len;
+ u16 fc;
+ u8 mic[MICHAEL_MIC_LEN];
+ struct sk_buff *skb = rx->skb;
+ int authenticator = 1, wpa_test = 0;
+
+ fc = rx->fc;
+
+ /* If device handles decryption totally, skip this check */
+ if ((rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) ||
+ (rx->local->hw.flags & IEEE80211_HW_DEVICE_STRIPS_MIC))
+ return TXRX_CONTINUE;
+
+ if (!rx->key || rx->key->alg != ALG_TKIP ||
+ !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc))
+ return TXRX_CONTINUE;
+
+ if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) &&
+ !rx->key->force_sw_encrypt) {
+ if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) {
+ if (skb->len < MICHAEL_MIC_LEN)
+ return TXRX_DROP;
+ }
+ /* Need to verify Michael MIC sometimes in software even when
+ * hwaccel is used. Atheros ar5212: fragmented frames and QoS
+ * frames. */
+ if (!rx->fragmented && !wpa_test)
+ goto remove_mic;
+ }
+
+ if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)
+ || data_len < MICHAEL_MIC_LEN)
+ return TXRX_DROP;
+
+ data_len -= MICHAEL_MIC_LEN;
+
+#if 0
+ authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */
+#else
+ authenticator = 1;
+#endif
+ key = &rx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY :
+ ALG_TKIP_TEMP_AUTH_TX_MIC_KEY];
+ michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
+ if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
+ if (!rx->u.rx.ra_match)
+ return TXRX_DROP;
+
+ printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from "
+ MAC_FMT "\n", rx->dev->name, MAC_ARG(sa));
+
+ do {
+ struct ieee80211_hdr *hdr;
+ union iwreq_data wrqu;
+ char *buf = kmalloc(128, GFP_ATOMIC);
+ if (!buf)
+ break;
+
+ /* TODO: needed parameters: count, key type, TSC */
+ hdr = (struct ieee80211_hdr *) skb->data;
+ sprintf(buf, "MLME-MICHAELMICFAILURE.indication("
+ "keyid=%d %scast addr=" MAC_FMT ")",
+ rx->key->keyidx,
+ hdr->addr1[0] & 0x01 ? "broad" : "uni",
+ MAC_ARG(hdr->addr2));
+ memset(&wrqu, 0, sizeof(wrqu));
+ wrqu.data.length = strlen(buf);
+ wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf);
+ kfree(buf);
+ } while (0);
+
+ if (!rx->local->apdev)
+ return TXRX_DROP;
+
+ ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status,
+ ieee80211_msg_michael_mic_failure);
+
+ return TXRX_QUEUED;
+ }
+
+ remove_mic:
+ /* remove Michael MIC from payload */
+ skb_trim(skb, skb->len - MICHAEL_MIC_LEN);
+
+ return TXRX_CONTINUE;
+}
+
+
+static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx,
+ struct sk_buff *skb, int test)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_key *key = tx->key;
+ int hdrlen, len, tailneed;
+ u16 fc;
+ u8 *pos;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+ len = skb->len - hdrlen;
+
+ tailneed = !tx->key->force_sw_encrypt ? 0 : TKIP_ICV_LEN;
+ if ((skb_headroom(skb) < TKIP_IV_LEN ||
+ skb_tailroom(skb) < tailneed)) {
+ I802_DEBUG_INC(tx->local->tx_expand_skb_head);
+ if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, tailneed,
+ GFP_ATOMIC)))
+ return -1;
+ }
+
+ pos = skb_push(skb, TKIP_IV_LEN);
+ memmove(pos, pos + TKIP_IV_LEN, hdrlen);
+ pos += hdrlen;
+
+ /* Increase IV for the frame */
+ key->u.tkip.iv16++;
+ if (key->u.tkip.iv16 == 0)
+ key->u.tkip.iv32++;
+
+ if (!tx->key->force_sw_encrypt) {
+ u32 flags = tx->local->hw.flags;
+ hdr = (struct ieee80211_hdr *)skb->data;
+
+ /* hwaccel - with preallocated room for IV */
+ ieee80211_tkip_add_iv(pos, key,
+ (u8) (key->u.tkip.iv16 >> 8),
+ (u8) (((key->u.tkip.iv16 >> 8) | 0x20) &
+ 0x7f),
+ (u8) key->u.tkip.iv16);
+
+ if (flags & IEEE80211_HW_TKIP_REQ_PHASE2_KEY)
+ ieee80211_tkip_gen_rc4key(key, hdr->addr2,
+ tx->u.tx.control->tkip_key);
+ else if (flags & IEEE80211_HW_TKIP_REQ_PHASE1_KEY) {
+ if (key->u.tkip.iv16 == 0 ||
+ !key->u.tkip.tx_initialized) {
+ ieee80211_tkip_gen_phase1key(key, hdr->addr2,
+ (u16 *)tx->u.tx.control->tkip_key);
+ key->u.tkip.tx_initialized = 1;
+ tx->u.tx.control->flags |=
+ IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY;
+ } else
+ tx->u.tx.control->flags &=
+ ~IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY;
+ }
+
+ tx->u.tx.control->key_idx = tx->key->hw_key_idx;
+ return 0;
+ }
+
+ /* Add room for ICV */
+ skb_put(skb, TKIP_ICV_LEN);
+
+ hdr = (struct ieee80211_hdr *) skb->data;
+ ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
+ key, pos, len, hdr->addr2);
+ return 0;
+}
+
+
+ieee80211_txrx_result
+ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+ u16 fc;
+ struct ieee80211_key *key = tx->key;
+ struct sk_buff *skb = tx->skb;
+ int wpa_test = 0, test = 0;
+
+ fc = le16_to_cpu(hdr->frame_control);
+
+ if (!key || key->alg != ALG_TKIP || !WLAN_FC_DATA_PRESENT(fc))
+ return TXRX_CONTINUE;
+
+ tx->u.tx.control->icv_len = TKIP_ICV_LEN;
+ tx->u.tx.control->iv_len = TKIP_IV_LEN;
+ ieee80211_tx_set_iswep(tx);
+
+ if (!tx->key->force_sw_encrypt &&
+ !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) &&
+ !wpa_test) {
+ /* hwaccel - with no need for preallocated room for IV/ICV */
+ tx->u.tx.control->key_idx = tx->key->hw_key_idx;
+ return TXRX_CONTINUE;
+ }
+
+ if (tkip_encrypt_skb(tx, skb, test) < 0)
+ return TXRX_DROP;
+
+ if (tx->u.tx.extra_frag) {
+ int i;
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ if (tkip_encrypt_skb(tx, tx->u.tx.extra_frag[i], test)
+ < 0)
+ return TXRX_DROP;
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+ieee80211_txrx_result
+ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+ u16 fc;
+ int hdrlen, res, hwaccel = 0, wpa_test = 0;
+ struct ieee80211_key *key = rx->key;
+ struct sk_buff *skb = rx->skb;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ if (!rx->key || rx->key->alg != ALG_TKIP ||
+ !(rx->fc & IEEE80211_FCTL_PROTECTED) ||
+ (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+ return TXRX_CONTINUE;
+
+ if (!rx->sta || skb->len - hdrlen < 12)
+ return TXRX_DROP;
+
+ if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) &&
+ !rx->key->force_sw_encrypt) {
+ if (!(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) {
+ /* Hardware takes care of all processing, including
+ * replay protection, so no need to continue here. */
+ return TXRX_CONTINUE;
+ }
+
+ /* let TKIP code verify IV, but skip decryption */
+ hwaccel = 1;
+ }
+
+ res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
+ key, skb->data + hdrlen,
+ skb->len - hdrlen, rx->sta->addr,
+ hwaccel, rx->u.rx.queue);
+ if (res != TKIP_DECRYPT_OK || wpa_test) {
+ printk(KERN_DEBUG "%s: TKIP decrypt failed for RX frame from "
+ MAC_FMT " (res=%d)\n",
+ rx->dev->name, MAC_ARG(rx->sta->addr), res);
+ return TXRX_DROP;
+ }
+
+ /* Trim ICV */
+ skb_trim(skb, skb->len - TKIP_ICV_LEN);
+
+ /* Remove IV */
+ memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen);
+ skb_pull(skb, TKIP_IV_LEN);
+
+ return TXRX_CONTINUE;
+}
+
+
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
+ int encrypted)
+{
+ u16 fc;
+ int a4_included, qos_included;
+ u8 qos_tid, *fc_pos, *data, *sa, *da;
+ int len_a;
+ size_t data_len;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+
+ fc_pos = (u8 *) &hdr->frame_control;
+ fc = fc_pos[0] ^ (fc_pos[1] << 8);
+ a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+ (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS);
+
+ ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len);
+ data_len -= CCMP_HDR_LEN + (encrypted ? CCMP_MIC_LEN : 0);
+ if (qos_tid & 0x80) {
+ qos_included = 1;
+ qos_tid &= 0x0f;
+ } else
+ qos_included = 0;
+ /* First block, b_0 */
+
+ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
+ /* Nonce: QoS Priority | A2 | PN */
+ b_0[1] = qos_tid;
+ memcpy(&b_0[2], hdr->addr2, 6);
+ memcpy(&b_0[8], pn, CCMP_PN_LEN);
+ /* l(m) */
+ b_0[14] = (data_len >> 8) & 0xff;
+ b_0[15] = data_len & 0xff;
+
+
+ /* AAD (extra authenticate-only data) / masked 802.11 header
+ * FC | A1 | A2 | A3 | SC | [A4] | [QC] */
+
+ len_a = a4_included ? 28 : 22;
+ if (qos_included)
+ len_a += 2;
+
+ aad[0] = 0; /* (len_a >> 8) & 0xff; */
+ aad[1] = len_a & 0xff;
+ /* Mask FC: zero subtype b4 b5 b6 */
+ aad[2] = fc_pos[0] & ~(BIT(4) | BIT(5) | BIT(6));
+ /* Retry, PwrMgt, MoreData; set Protected */
+ aad[3] = (fc_pos[1] & ~(BIT(3) | BIT(4) | BIT(5))) | BIT(6);
+ memcpy(&aad[4], &hdr->addr1, 18);
+
+ /* Mask Seq#, leave Frag# */
+ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f;
+ aad[23] = 0;
+ if (a4_included) {
+ memcpy(&aad[24], hdr->addr4, 6);
+ aad[30] = 0;
+ aad[31] = 0;
+ } else
+ memset(&aad[24], 0, 8);
+ if (qos_included) {
+ u8 *dpos = &aad[a4_included ? 30 : 24];
+
+ /* Mask QoS Control field */
+ dpos[0] = qos_tid;
+ dpos[1] = 0;
+ }
+}
+
+
+static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id)
+{
+ hdr[0] = pn[5];
+ hdr[1] = pn[4];
+ hdr[2] = 0;
+ hdr[3] = 0x20 | (key_id << 6);
+ hdr[4] = pn[3];
+ hdr[5] = pn[2];
+ hdr[6] = pn[1];
+ hdr[7] = pn[0];
+}
+
+
+static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr)
+{
+ pn[0] = hdr[7];
+ pn[1] = hdr[6];
+ pn[2] = hdr[5];
+ pn[3] = hdr[4];
+ pn[4] = hdr[1];
+ pn[5] = hdr[0];
+ return (hdr[3] >> 6) & 0x03;
+}
+
+
+static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx,
+ struct sk_buff *skb, int test)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_key *key = tx->key;
+ int hdrlen, len, tailneed;
+ u16 fc;
+ u8 *pos, *pn, *b_0, *aad, *scratch;
+ int i;
+
+ scratch = key->u.ccmp.tx_crypto_buf;
+ b_0 = scratch + 3 * AES_BLOCK_LEN;
+ aad = scratch + 4 * AES_BLOCK_LEN;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+ len = skb->len - hdrlen;
+
+ tailneed = !key->force_sw_encrypt ? 0 : CCMP_MIC_LEN;
+
+ if ((skb_headroom(skb) < CCMP_HDR_LEN ||
+ skb_tailroom(skb) < tailneed)) {
+ I802_DEBUG_INC(tx->local->tx_expand_skb_head);
+ if (unlikely(pskb_expand_head(skb, CCMP_HDR_LEN, tailneed,
+ GFP_ATOMIC)))
+ return -1;
+ }
+
+ pos = skb_push(skb, CCMP_HDR_LEN);
+ memmove(pos, pos + CCMP_HDR_LEN, hdrlen);
+ hdr = (struct ieee80211_hdr *) pos;
+ pos += hdrlen;
+
+ /* PN = PN + 1 */
+ pn = key->u.ccmp.tx_pn;
+
+ for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
+ pn[i]++;
+ if (pn[i])
+ break;
+ }
+
+ ccmp_pn2hdr(pos, pn, key->keyidx);
+
+ if (!key->force_sw_encrypt) {
+ /* hwaccel - with preallocated room for CCMP header */
+ tx->u.tx.control->key_idx = key->hw_key_idx;
+ return 0;
+ }
+
+ pos += CCMP_HDR_LEN;
+ ccmp_special_blocks(skb, pn, b_0, aad, 0);
+ ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, b_0, aad, pos, len,
+ pos, skb_put(skb, CCMP_MIC_LEN));
+
+ return 0;
+}
+
+
+ieee80211_txrx_result
+ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+ struct ieee80211_key *key = tx->key;
+ u16 fc;
+ struct sk_buff *skb = tx->skb;
+ int test = 0;
+
+ fc = le16_to_cpu(hdr->frame_control);
+
+ if (!key || key->alg != ALG_CCMP || !WLAN_FC_DATA_PRESENT(fc))
+ return TXRX_CONTINUE;
+
+ tx->u.tx.control->icv_len = CCMP_MIC_LEN;
+ tx->u.tx.control->iv_len = CCMP_HDR_LEN;
+ ieee80211_tx_set_iswep(tx);
+
+ if (!tx->key->force_sw_encrypt &&
+ !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) {
+ /* hwaccel - with no need for preallocated room for CCMP "
+ * header or MIC fields */
+ tx->u.tx.control->key_idx = tx->key->hw_key_idx;
+ return TXRX_CONTINUE;
+ }
+
+ if (ccmp_encrypt_skb(tx, skb, test) < 0)
+ return TXRX_DROP;
+
+ if (tx->u.tx.extra_frag) {
+ int i;
+
+ for (i = 0; i < tx->u.tx.num_extra_frag; i++) {
+ if (ccmp_encrypt_skb(tx, tx->u.tx.extra_frag[i], test)
+ < 0)
+ return TXRX_DROP;
+ }
+ }
+
+ return TXRX_CONTINUE;
+}
+
+
+ieee80211_txrx_result
+ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+ u16 fc;
+ int hdrlen;
+ struct ieee80211_key *key = rx->key;
+ struct sk_buff *skb = rx->skb;
+ u8 pn[CCMP_PN_LEN];
+ int data_len;
+
+ fc = le16_to_cpu(hdr->frame_control);
+ hdrlen = ieee80211_get_hdrlen(fc);
+
+ if (!key || key->alg != ALG_CCMP ||
+ !(rx->fc & IEEE80211_FCTL_PROTECTED) ||
+ (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+ return TXRX_CONTINUE;
+
+ data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN;
+ if (!rx->sta || data_len < 0)
+ return TXRX_DROP;
+
+ if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) &&
+ !key->force_sw_encrypt &&
+ !(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV))
+ return TXRX_CONTINUE;
+
+ (void) ccmp_hdr2pn(pn, skb->data + hdrlen);
+
+ if (memcmp(pn, key->u.ccmp.rx_pn[rx->u.rx.queue], CCMP_PN_LEN) <= 0) {
+#ifdef CONFIG_MAC80211_DEBUG
+ u8 *ppn = key->u.ccmp.rx_pn[rx->u.rx.queue];
+ printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from "
+ MAC_FMT " (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN "
+ "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name,
+ MAC_ARG(rx->sta->addr),
+ pn[0], pn[1], pn[2], pn[3], pn[4], pn[5],
+ ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]);
+#endif /* CONFIG_MAC80211_DEBUG */
+ key->u.ccmp.replays++;
+ return TXRX_DROP;
+ }
+
+ if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) &&
+ !key->force_sw_encrypt) {
+ /* hwaccel has already decrypted frame and verified MIC */
+ } else {
+ u8 *scratch, *b_0, *aad;
+
+ scratch = key->u.ccmp.rx_crypto_buf;
+ b_0 = scratch + 3 * AES_BLOCK_LEN;
+ aad = scratch + 4 * AES_BLOCK_LEN;
+
+ ccmp_special_blocks(skb, pn, b_0, aad, 1);
+
+ if (ieee80211_aes_ccm_decrypt(
+ key->u.ccmp.tfm, scratch, b_0, aad,
+ skb->data + hdrlen + CCMP_HDR_LEN, data_len,
+ skb->data + skb->len - CCMP_MIC_LEN,
+ skb->data + hdrlen + CCMP_HDR_LEN)) {
+ printk(KERN_DEBUG "%s: CCMP decrypt failed for RX "
+ "frame from " MAC_FMT "\n", rx->dev->name,
+ MAC_ARG(rx->sta->addr));
+ return TXRX_DROP;
+ }
+ }
+
+ memcpy(key->u.ccmp.rx_pn[rx->u.rx.queue], pn, CCMP_PN_LEN);
+
+ /* Remove CCMP header and MIC */
+ skb_trim(skb, skb->len - CCMP_MIC_LEN);
+ memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen);
+ skb_pull(skb, CCMP_HDR_LEN);
+
+ return TXRX_CONTINUE;
+}
+
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
new file mode 100644
index 000000000000..da3b9594f9c3
--- /dev/null
+++ b/net/mac80211/wpa.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2002-2004, Instant802 Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef WPA_H
+#define WPA_H
+
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include "ieee80211_i.h"
+
+ieee80211_txrx_result
+ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx);
+ieee80211_txrx_result
+ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx);
+
+ieee80211_txrx_result
+ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx);
+ieee80211_txrx_result
+ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx);
+
+ieee80211_txrx_result
+ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx);
+ieee80211_txrx_result
+ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx);
+
+#endif /* WPA_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 253fce3ad2d3..ea6211cade0a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,6 +25,7 @@ config NETFILTER_NETLINK_LOG
and is also scheduled to replace the old syslog-based ipt_LOG
and ip6t_LOG modules.
+# Rename this to NF_CONNTRACK in a 2.6.25
config NF_CONNTRACK_ENABLED
tristate "Netfilter connection tracking support"
help
@@ -39,42 +40,9 @@ config NF_CONNTRACK_ENABLED
To compile it as a module, choose M here. If unsure, say N.
-choice
- prompt "Netfilter connection tracking support"
- depends on NF_CONNTRACK_ENABLED
-
-config NF_CONNTRACK_SUPPORT
- bool "Layer 3 Independent Connection tracking"
- help
- Layer 3 independent connection tracking is experimental scheme
- which generalize ip_conntrack to support other layer 3 protocols.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
-
-config IP_NF_CONNTRACK_SUPPORT
- bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
- help
- The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
-
-endchoice
-
config NF_CONNTRACK
tristate
- default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
- default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
-
-config IP_NF_CONNTRACK
- tristate
- default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
- default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
+ default NF_CONNTRACK_ENABLED
config NF_CT_ACCT
bool "Connection tracking flow accounting"
@@ -132,7 +100,7 @@ config NF_CT_PROTO_SCTP
tracking code will be able to do state tracking on SCTP connections.
If you want to compile it as a module, say M here and read
- Documentation/modules.txt. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NF_CONNTRACK_AMANDA
tristate "Amanda backup protocol support"
@@ -275,6 +243,7 @@ config NF_CT_NETLINK
tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+ depends on NF_NAT=n || NF_NAT
help
This option enables support for a netlink-based userspace interface
@@ -302,17 +271,16 @@ config NETFILTER_XT_TARGET_CONNMARK
tristate '"CONNMARK" target support'
depends on NETFILTER_XTABLES
depends on IP_NF_MANGLE || IP6_NF_MANGLE
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CONNTRACK_MARK
help
This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but
affects the connection mark value rather than the packet mark value.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. The module will be called
- ipt_CONNMARK.o. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. The module will be called
+ ipt_CONNMARK.ko. If unsure, say `N'.
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" target support'
@@ -365,7 +333,7 @@ config NETFILTER_XT_TARGET_NOTRACK
tristate '"NOTRACK" target support'
depends on NETFILTER_XTABLES
depends on IP_NF_RAW || IP6_NF_RAW
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
The NOTRACK target allows a select rule to specify
which packets *not* to enter the conntrack/NAT
@@ -373,7 +341,7 @@ config NETFILTER_XT_TARGET_NOTRACK
no protocol helpers for the selected packets).
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_TARGET_SECMARK
tristate '"SECMARK" target support'
@@ -386,9 +354,7 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_CONNSECMARK
tristate '"CONNSECMARK" target support'
- depends on NETFILTER_XTABLES && \
- ((NF_CONNTRACK && NF_CONNTRACK_SECMARK) || \
- (IP_NF_CONNTRACK && IP_NF_CONNTRACK_SECMARK))
+ depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
help
The CONNSECMARK target copies security markings from packets
to connections, and restores security markings from connections
@@ -431,39 +397,37 @@ config NETFILTER_XT_MATCH_COMMENT
comments in your iptables ruleset.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_CONNBYTES
tristate '"connbytes" per-connection counter match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CT_ACCT if IP_NF_CONNTRACK
- select NF_CT_ACCT if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CT_ACCT
help
This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_CONNMARK
tristate '"connmark" connection mark match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CONNTRACK_MARK
help
This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. The module will be called
- ipt_connmark.o. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. The module will be called
+ ipt_connmark.ko. If unsure, say `N'.
config NETFILTER_XT_MATCH_CONNTRACK
tristate '"conntrack" connection tracking match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
This is a general conntrack match module, a superset of the state match.
@@ -482,7 +446,7 @@ config NETFILTER_XT_MATCH_DCCP
and DCCP flags.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_DSCP
tristate '"DSCP" match support'
@@ -507,7 +471,7 @@ config NETFILTER_XT_MATCH_ESP
config NETFILTER_XT_MATCH_HELPER
tristate '"helper" match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
Helper matching allows you to match packets in dynamic connections
tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -601,7 +565,7 @@ config NETFILTER_XT_MATCH_QUOTA
byte counter.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_REALM
tristate '"realm" match support'
@@ -615,7 +579,7 @@ config NETFILTER_XT_MATCH_REALM
in tc world.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_SCTP
tristate '"sctp" protocol match support (EXPERIMENTAL)'
@@ -626,12 +590,12 @@ config NETFILTER_XT_MATCH_SCTP
and SCTP chunk types.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_STATE
tristate '"state" match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
Connection state matching allows you to match packets based on their
relationship to a tracked connection (ie. previous packets). This
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c3ebdbd917e9..a84478ee2ded 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,10 +5,6 @@
* way.
*
* Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000: Added NF_REPEAT --RR.
- * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -244,6 +240,7 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(nf_proto_csum_replace4);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
@@ -264,6 +261,22 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
}
EXPORT_SYMBOL(nf_ct_attach);
+void (*nf_ct_destroy)(struct nf_conntrack *);
+EXPORT_SYMBOL(nf_ct_destroy);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+ void (*destroy)(struct nf_conntrack *);
+
+ rcu_read_lock();
+ destroy = rcu_dereference(nf_ct_destroy);
+ BUG_ON(destroy == NULL);
+ destroy(nfct);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+#endif /* CONFIG_NF_CONNTRACK */
+
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_net_netfilter;
EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3a70eb6d42a..e132c8ae8784 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -9,24 +9,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * - add usage/reference counts to ip_conntrack_expect
- * - export ip_conntrack[_expect]_{find_get,put} functions
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - generalize L3 protocol denendent part.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add support various size of conntrack structures.
- * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
- * - restructure nf_conn (introduce nf_conn_help)
- * - redesign 'features' how they were originally intended
- * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
- * - add support for L3 protocol module load on demand.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_core.c
*/
#include <linux/types.h>
@@ -128,10 +110,11 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
unsigned int size, unsigned int rnd)
{
unsigned int a, b;
- a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
- ((tuple->src.l3num) << 16) | tuple->dst.protonum);
- b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
- (tuple->src.u.all << 16) | tuple->dst.u.all);
+
+ a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
+ (tuple->src.l3num << 16) | tuple->dst.protonum);
+ b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+ (tuple->src.u.all << 16) | tuple->dst.u.all);
return jhash_2words(a, b, rnd) % size;
}
@@ -633,13 +616,11 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
memset(conntrack, 0, nf_ct_cache[features].size);
conntrack->features = features;
atomic_set(&conntrack->ct_general.use, 1);
- conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
/* Don't set timer yet: wait for confirmation */
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
+ setup_timer(&conntrack->timeout, death_by_timeout,
+ (unsigned long)conntrack);
read_unlock_bh(&nf_ct_cache_lock);
return conntrack;
@@ -768,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
+ if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
l4proto)) {
DEBUGP("resolve_normal_ct: Can't get tuple\n");
@@ -960,7 +941,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (do_acct) {
ct->counters[CTINFO2DIR(ctinfo)].packets++;
ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- skb->len - (unsigned int)(skb->nh.raw - skb->data);
+ skb->len - skb_network_offset(skb);
if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
|| (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
@@ -1140,6 +1121,8 @@ void nf_conntrack_cleanup(void)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
+ rcu_assign_pointer(nf_ct_destroy, NULL);
+
for (i = 0; i < NF_CT_F_NUM; i++) {
if (nf_ct_cache[i].use == 0)
continue;
@@ -1152,14 +1135,7 @@ void nf_conntrack_cleanup(void)
free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
nf_conntrack_htable_size);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
-
- /* free l3proto protocol tables */
- for (i = 0; i < PF_MAX; i++)
- if (nf_ct_protos[i]) {
- kfree(nf_ct_protos[i]);
- nf_ct_protos[i] = NULL;
- }
+ nf_conntrack_proto_fini();
}
static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1237,7 +1213,6 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
int __init nf_conntrack_init(void)
{
- unsigned int i;
int ret;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
@@ -1279,18 +1254,13 @@ int __init nf_conntrack_init(void)
goto err_free_conntrack_slab;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+ ret = nf_conntrack_proto_init();
if (ret < 0)
goto out_free_expect_slab;
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&nf_conntrack_lock);
- for (i = 0; i < AF_MAX; i++)
- nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
- write_unlock_bh(&nf_conntrack_lock);
-
/* For use by REJECT target */
rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
+ rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1a223e0c0856..6bd421df2dbc 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -91,3 +91,26 @@ void nf_ct_event_cache_flush(void)
}
}
+int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
+
+int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ce70a6fc6bda..c31af29a4439 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -290,9 +290,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
master_help->expecting++;
list_add(&exp->list, &nf_conntrack_expect_list);
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
+ setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
add_timer(&exp->timeout);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3089dfc40c88..a186799f6542 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -7,12 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- * - track EPRT and EPSV commands with IPv6 address.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
*/
#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bb26a658cc1c..1093478cc007 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -46,7 +46,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_expect *exp;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct in_device *in_dev;
__be32 mask = 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48f05314ebf7..aa1a97ee514b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -6,9 +6,6 @@
* (C) 2003 by Patrick Mchardy <kaber@trash.net>
* (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
*
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
* Initial connection tracking via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
*
@@ -16,8 +13,6 @@
*
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
- *
- * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
*/
#include <linux/init.h>
@@ -33,6 +28,7 @@
#include <linux/notifier.h>
#include <linux/netfilter.h>
+#include <net/netlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -268,9 +264,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nfattr *nest_parms;
- unsigned char *b;
-
- b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
event |= NFNL_SUBSYS_CTNETLINK << 8;
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -303,12 +297,12 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
ctnetlink_dump_use(skb, ct) < 0)
goto nfattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
nfattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -322,7 +316,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
struct nf_conn *ct = (struct nf_conn *)ptr;
struct sk_buff *skb;
unsigned int type;
- unsigned char *b;
+ sk_buff_data_t b;
unsigned int flags = 0, group;
/* ignore our fake conntrack entry */
@@ -662,7 +656,7 @@ static const size_t cta_min[CTA_MAX] = {
static int
ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -710,7 +704,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
static int
ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -721,22 +715,12 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
int err = 0;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- u32 rlen;
-
#ifndef CONFIG_NF_CT_ACCT
if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
return -ENOTSUPP;
#endif
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
-
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
+ return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+ ctnetlink_done);
}
if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1010,7 +994,7 @@ err:
static int
ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
@@ -1152,9 +1136,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned char *b;
-
- b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -1168,12 +1150,12 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nfattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
nfattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1186,7 +1168,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
struct sk_buff *skb;
unsigned int type;
- unsigned char *b;
+ sk_buff_data_t b;
int flags = 0;
if (events & IPEXP_NEW) {
@@ -1263,7 +1245,7 @@ static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
@@ -1276,17 +1258,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
return -EINVAL;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- u32 rlen;
-
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_exp_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
+ return netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_exp_dump_table,
+ ctnetlink_done);
}
if (cda[CTA_EXPECT_MASTER-1])
@@ -1333,7 +1307,7 @@ out:
static int
ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_expect *exp, *tmp;
struct nf_conntrack_tuple tuple;
@@ -1467,7 +1441,7 @@ out:
static int
ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 456155f05c75..6d947068c58f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,13 +28,13 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_l3protos);
-#ifdef CONFIG_SYSCTL
-static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
+static DEFINE_MUTEX(nf_ct_proto_mutex);
+#ifdef CONFIG_SYSCTL
static int
nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
struct ctl_table *table, unsigned int *users)
@@ -164,13 +164,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
int err = 0;
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l3proto->ctl_table != NULL) {
err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
l3proto->ctl_table_path,
l3proto->ctl_table, NULL);
}
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif
return err;
}
@@ -178,11 +176,9 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
{
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l3proto->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
l3proto->ctl_table, NULL);
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif
}
@@ -190,27 +186,23 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
- if (proto->l3proto >= AF_MAX) {
- ret = -EBUSY;
- goto out;
- }
+ if (proto->l3proto >= AF_MAX)
+ return -EBUSY;
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
- rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
- write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l3proto_register_sysctl(proto);
if (ret < 0)
- nf_conntrack_l3proto_unregister(proto);
- return ret;
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
- write_unlock_bh(&nf_conntrack_lock);
-out:
+ mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -219,14 +211,14 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= AF_MAX);
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
- write_unlock_bh(&nf_conntrack_lock);
- synchronize_rcu();
-
nf_ct_l3proto_unregister_sysctl(proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l3proto, proto);
@@ -238,7 +230,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
int err = 0;
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l4proto->ctl_table != NULL) {
err = nf_ct_register_sysctl(l4proto->ctl_table_header,
nf_net_netfilter_sysctl_path,
@@ -260,7 +251,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
}
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
out:
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif /* CONFIG_SYSCTL */
return err;
}
@@ -268,7 +258,6 @@ out:
static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l4proto->ctl_table_header != NULL &&
*l4proto->ctl_table_header != NULL)
nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +268,6 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
l4proto->ctl_compat_table, NULL);
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif /* CONFIG_SYSCTL */
}
@@ -289,68 +277,41 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (l4proto->l3proto >= PF_MAX) {
- ret = -EBUSY;
- goto out;
- }
-
- if (l4proto == &nf_conntrack_l4proto_generic)
- return nf_ct_l4proto_register_sysctl(l4proto);
+ if (l4proto->l3proto >= PF_MAX)
+ return -EBUSY;
-retry:
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[l4proto->l3proto]) {
- if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
- != &nf_conntrack_l4proto_generic) {
- ret = -EBUSY;
- goto out_unlock;
- }
- } else {
+ mutex_lock(&nf_ct_proto_mutex);
+ if (!nf_ct_protos[l4proto->l3proto]) {
/* l3proto may be loaded latter. */
struct nf_conntrack_l4proto **proto_array;
int i;
- write_unlock_bh(&nf_conntrack_lock);
-
- proto_array = (struct nf_conntrack_l4proto **)
- kmalloc(MAX_NF_CT_PROTO *
- sizeof(struct nf_conntrack_l4proto *),
- GFP_KERNEL);
+ proto_array = kmalloc(MAX_NF_CT_PROTO *
+ sizeof(struct nf_conntrack_l4proto *),
+ GFP_KERNEL);
if (proto_array == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out_unlock;
}
+
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_l4proto_generic;
-
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[l4proto->l3proto]) {
- /* bad timing, but no problem */
- write_unlock_bh(&nf_conntrack_lock);
- kfree(proto_array);
- } else {
- nf_ct_protos[l4proto->l3proto] = proto_array;
- write_unlock_bh(&nf_conntrack_lock);
- }
-
- /*
- * Just once because array is never freed until unloading
- * nf_conntrack.ko
- */
- goto retry;
+ nf_ct_protos[l4proto->l3proto] = proto_array;
+ } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
+ &nf_conntrack_l4proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
}
- rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
- write_unlock_bh(&nf_conntrack_lock);
-
ret = nf_ct_l4proto_register_sysctl(l4proto);
if (ret < 0)
- nf_conntrack_l4proto_unregister(l4proto);
- return ret;
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ l4proto);
out_unlock:
- write_unlock_bh(&nf_conntrack_lock);
-out:
+ mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -359,21 +320,42 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= PF_MAX);
- if (l4proto == &nf_conntrack_l4proto_generic) {
- nf_ct_l4proto_unregister_sysctl(l4proto);
- return;
- }
-
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
- write_unlock_bh(&nf_conntrack_lock);
- synchronize_rcu();
-
nf_ct_l4proto_unregister_sysctl(l4proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l4proto, l4proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+ unsigned int i;
+ int err;
+
+ err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < AF_MAX; i++)
+ rcu_assign_pointer(nf_ct_l3protos[i],
+ &nf_conntrack_l3proto_generic);
+ return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+ unsigned int i;
+
+ nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
+
+ /* free l3proto protocol tables */
+ for (i = 0; i < PF_MAX; i++)
+ kfree(nf_ct_protos[i]);
+}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 7c069939695a..6faf1bed7224 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3c80558716a0..0d3254b974c5 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -7,15 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/ip_conntrack_sctp.c
- */
-
-/*
- * Added support for proc manipulation of timeouts.
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 153d6619993a..ccdd5d231e0d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -4,24 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- * - Real stateful connection tracking
- * - Modified state transitions table
- * - Window scaling support added
- * - SACK support added
- *
- * Willy Tarreau:
- * - State table bugfixes
- * - More robust state changes
- * - Tuning timer parameters
- *
- * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - genelized Layer 3 protocol part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
- *
- * version 2.2
*/
#include <linux/types.h>
@@ -470,11 +452,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__be32 *)ptr ==
- __constant_htonl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
+ && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+ | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
return;
while (length > 0) {
@@ -765,26 +746,18 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
#define TH_ECE 0x40
#define TH_CWR 0x80
-/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
{
[TH_SYN] = 1,
- [TH_SYN|TH_PUSH] = 1,
[TH_SYN|TH_URG] = 1,
- [TH_SYN|TH_PUSH|TH_URG] = 1,
[TH_SYN|TH_ACK] = 1,
- [TH_SYN|TH_ACK|TH_PUSH] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
- [TH_RST|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK] = 1,
+ [TH_FIN|TH_ACK|TH_URG] = 1,
[TH_ACK] = 1,
- [TH_ACK|TH_PUSH] = 1,
[TH_ACK|TH_URG] = 1,
- [TH_ACK|TH_URG|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_URG] = 1,
- [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -831,7 +804,7 @@ static int tcp_error(struct sk_buff *skb,
}
/* Check TCP flags. */
- tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
if (!tcp_valid_flags[tcpflags]) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -1110,11 +1083,26 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct nf_conn *ct)
{
struct nfattr *nest_parms;
+ struct nf_ct_tcp_flags tmp = {};
read_lock_bh(&tcp_lock);
nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
+
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[0].td_scale);
+
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[1].td_scale);
+
+ tmp.flags = ct->proto.tcp.seen[0].flags;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
+
+ tmp.flags = ct->proto.tcp.seen[1].flags;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
read_unlock_bh(&tcp_lock);
NFA_NEST_END(skb, nest_parms);
@@ -1127,7 +1115,11 @@ nfattr_failure:
}
static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags),
+ [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags)
};
static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1151,6 +1143,30 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
write_lock_bh(&tcp_lock);
ct->proto.tcp.state =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
+ struct nf_ct_tcp_flags *attr =
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
+ ct->proto.tcp.seen[0].flags &= ~attr->mask;
+ ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
+ struct nf_ct_tcp_flags *attr =
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
+ ct->proto.tcp.seen[1].flags &= ~attr->mask;
+ ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
+ tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
+ ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+ ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+ ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+ }
write_unlock_bh(&tcp_lock);
return 0;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a5e5726ec0c7..3620ecc095fd 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b8586360e519..45baeb0e30f9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,20 +1,9 @@
-/* This file contains all the functions required for the standalone
- nf_conntrack module.
-
- These are not required by the compatibility layer.
-*/
-
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - generalize L3 protocol dependent part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index bf23e489e4cd..8797e6953ef2 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
*
* (C) 2001 by Jay Schulist <jschlst@samba.org>,
* (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Initial netfilter messages via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -28,10 +28,9 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <net/sock.h>
+#include <net/netlink.h>
#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/netfilter/nfnetlink.h>
@@ -41,32 +40,34 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
static char __initdata nfversion[] = "0.30";
-#if 0
-#define DEBUGP(format, args...) \
- printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
- __LINE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
static struct sock *nfnl = NULL;
static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
-DECLARE_MUTEX(nfnl_sem);
+static DEFINE_MUTEX(nfnl_mutex);
-void nfnl_lock(void)
+static void nfnl_lock(void)
{
- nfnl_shlock();
+ mutex_lock(&nfnl_mutex);
}
-void nfnl_unlock(void)
+static int nfnl_trylock(void)
{
- nfnl_shunlock();
+ return !mutex_trylock(&nfnl_mutex);
}
-int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+static void __nfnl_unlock(void)
{
- DEBUGP("registering subsystem ID %u\n", n->subsys_id);
+ mutex_unlock(&nfnl_mutex);
+}
+
+static void nfnl_unlock(void)
+{
+ mutex_unlock(&nfnl_mutex);
+ if (nfnl->sk_receive_queue.qlen)
+ nfnl->sk_data_ready(nfnl, 0);
+}
+int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+{
nfnl_lock();
if (subsys_table[n->subsys_id]) {
nfnl_unlock();
@@ -77,24 +78,23 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
return 0;
}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
{
- DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
-
nfnl_lock();
subsys_table[n->subsys_id] = NULL;
nfnl_unlock();
return 0;
}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
{
u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
- if (subsys_id >= NFNL_SUBSYS_COUNT
- || subsys_table[subsys_id] == NULL)
+ if (subsys_id >= NFNL_SUBSYS_COUNT)
return NULL;
return subsys_table[subsys_id];
@@ -105,10 +105,8 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
{
u_int8_t cb_id = NFNL_MSG_TYPE(type);
- if (cb_id >= ss->cb_count) {
- DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+ if (cb_id >= ss->cb_count)
return NULL;
- }
return &ss->cb[cb_id];
}
@@ -125,6 +123,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
memcpy(NFA_DATA(nfa), data, attrlen);
memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
}
+EXPORT_SYMBOL_GPL(__nfa_fill);
void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
{
@@ -137,6 +136,7 @@ void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
nfa = NFA_NEXT(nfa, len);
}
}
+EXPORT_SYMBOL_GPL(nfattr_parse);
/**
* nfnetlink_check_attributes - check and parse nfnetlink attributes
@@ -150,37 +150,15 @@ static int
nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
struct nlmsghdr *nlh, struct nfattr *cda[])
{
- int min_len;
- u_int16_t attr_count;
+ int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
-
- if (unlikely(cb_id >= subsys->cb_count)) {
- DEBUGP("msgtype %u >= %u, returning\n",
- cb_id, subsys->cb_count);
- return -EINVAL;
- }
-
- min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
- if (unlikely(nlh->nlmsg_len < min_len))
- return -EINVAL;
-
- attr_count = subsys->cb[cb_id].attr_count;
- memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+ u_int16_t attr_count = subsys->cb[cb_id].attr_count;
/* check attribute lengths. */
if (likely(nlh->nlmsg_len > min_len)) {
struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
- while (NFA_OK(attr, attrlen)) {
- unsigned flavor = NFA_TYPE(attr);
- if (flavor) {
- if (flavor > attr_count)
- return -EINVAL;
- cda[flavor - 1] = attr;
- }
- attr = NFA_NEXT(attr, attrlen);
- }
+ nfattr_parse(cda, attr_count, attr, attrlen);
}
/* implicit: if nlmsg_len == min_len, we return 0, and an empty
@@ -208,62 +186,46 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
+EXPORT_SYMBOL_GPL(nfnetlink_send);
int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
{
return netlink_unicast(nfnl, skb, pid, flags);
}
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
/* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb,
- struct nlmsghdr *nlh, int *errp)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct nfnl_callback *nc;
struct nfnetlink_subsystem *ss;
- int type, err = 0;
-
- DEBUGP("entered; subsys=%u, msgtype=%u\n",
- NFNL_SUBSYS_ID(nlh->nlmsg_type),
- NFNL_MSG_TYPE(nlh->nlmsg_type));
-
- if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
- DEBUGP("missing CAP_NET_ADMIN\n");
- *errp = -EPERM;
- return -1;
- }
+ int type, err;
- /* Only requests are handled by kernel now. */
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
- DEBUGP("received non-request message\n");
- return 0;
- }
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
- DEBUGP("received message was too short\n");
+ if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
return 0;
- }
type = nlh->nlmsg_type;
ss = nfnetlink_get_subsys(type);
if (!ss) {
#ifdef CONFIG_KMOD
- /* don't call nfnl_shunlock, since it would reenter
+ /* don't call nfnl_unlock, since it would reenter
* with further packet processing */
- up(&nfnl_sem);
+ __nfnl_unlock();
request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
- nfnl_shlock();
+ nfnl_lock();
ss = nfnetlink_get_subsys(type);
if (!ss)
#endif
- goto err_inval;
+ return -EINVAL;
}
nc = nfnetlink_find_client(type, ss);
- if (!nc) {
- DEBUGP("unable to find client for type %d\n", type);
- goto err_inval;
- }
+ if (!nc)
+ return -EINVAL;
{
u_int16_t attr_count =
@@ -274,73 +236,21 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
err = nfnetlink_check_attributes(ss, nlh, cda);
if (err < 0)
- goto err_inval;
-
- DEBUGP("calling handler\n");
- err = nc->call(nfnl, skb, nlh, cda, errp);
- *errp = err;
- return err;
- }
-
-err_inval:
- DEBUGP("returning -EINVAL\n");
- *errp = -EINVAL;
- return -1;
-}
-
-/* Process one packet of messages. */
-static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
-{
- int err;
- struct nlmsghdr *nlh;
-
- while (skb->len >= NLMSG_SPACE(0)) {
- u32 rlen;
-
- nlh = (struct nlmsghdr *)skb->data;
- if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
- || skb->len < nlh->nlmsg_len)
- return 0;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- if (nfnetlink_rcv_msg(skb, nlh, &err)) {
- if (!err)
- return -1;
- netlink_ack(skb, nlh, err);
- } else
- if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
- skb_pull(skb, rlen);
+ return err;
+ return nc->call(nfnl, skb, nlh, cda);
}
-
- return 0;
}
static void nfnetlink_rcv(struct sock *sk, int len)
{
- do {
- struct sk_buff *skb;
+ unsigned int qlen = 0;
- if (nfnl_shlock_nowait())
+ do {
+ if (nfnl_trylock())
return;
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if (nfnetlink_rcv_skb(skb)) {
- if (skb->len)
- skb_queue_head(&sk->sk_receive_queue,
- skb);
- else
- kfree_skb(skb);
- break;
- }
- kfree_skb(skb);
- }
-
- /* don't call nfnl_shunlock, since it would reenter
- * with further packet processing */
- up(&nfnl_sem);
- } while(nfnl && nfnl->sk_receive_queue.qlen);
+ netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg);
+ __nfnl_unlock();
+ } while (qlen);
}
static void __exit nfnetlink_exit(void)
@@ -355,7 +265,7 @@ static int __init nfnetlink_init(void)
printk("Netfilter messages via NETLINK v%s.\n", nfversion);
nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
- nfnetlink_rcv, THIS_MODULE);
+ nfnetlink_rcv, NULL, THIS_MODULE);
if (!nfnl) {
printk(KERN_ERR "cannot initialize nfnetlink!\n");
return -1;
@@ -366,10 +276,3 @@ static int __init nfnetlink_init(void)
module_init(nfnetlink_init);
module_exit(nfnetlink_exit);
-
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
-EXPORT_SYMBOL_GPL(nfnetlink_send);
-EXPORT_SYMBOL_GPL(nfnetlink_unicast);
-EXPORT_SYMBOL_GPL(nfattr_parse);
-EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5cb30ebba0f4..e32e30e7a17c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -10,11 +10,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2006-01-26 Harald Welte <laforge@netfilter.org>
- * - Add optional local and global sequence number to detect lost
- * events from userspace
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -163,10 +158,7 @@ instance_create(u_int16_t group_num, int pid)
/* needs to be two, since we _put() after creation */
atomic_set(&inst->use, 2);
- init_timer(&inst->timer);
- inst->timer.function = nfulnl_timer;
- inst->timer.data = (unsigned long)inst;
- /* don't start timer yet. (re)start it with every packet */
+ setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
inst->peer_pid = pid;
inst->group_num = group_num;
@@ -200,20 +192,14 @@ out_unlock:
static int __nfulnl_send(struct nfulnl_instance *inst);
static void
-_instance_destroy2(struct nfulnl_instance *inst, int lock)
+__instance_destroy(struct nfulnl_instance *inst)
{
/* first pull it out of the global list */
- if (lock)
- write_lock_bh(&instances_lock);
-
UDEBUG("removing instance %p (queuenum=%u) from hash\n",
inst, inst->group_num);
hlist_del(&inst->hlist);
- if (lock)
- write_unlock_bh(&instances_lock);
-
/* then flush all pending packets from skb */
spin_lock_bh(&inst->lock);
@@ -235,15 +221,11 @@ _instance_destroy2(struct nfulnl_instance *inst, int lock)
}
static inline void
-__instance_destroy(struct nfulnl_instance *inst)
-{
- _instance_destroy2(inst, 0);
-}
-
-static inline void
instance_destroy(struct nfulnl_instance *inst)
{
- _instance_destroy2(inst, 1);
+ write_lock_bh(&instances_lock);
+ __instance_destroy(inst);
+ write_unlock_bh(&instances_lock);
}
static int
@@ -365,9 +347,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
{
int status;
- if (!inst->skb)
- return 0;
-
if (inst->qlen > 1)
inst->lastnlh->nlmsg_type = NLMSG_DONE;
@@ -391,7 +370,8 @@ static void nfulnl_timer(unsigned long data)
UDEBUG("timer function called, flushing buffer\n");
spin_lock_bh(&inst->lock);
- __nfulnl_send(inst);
+ if (inst->skb)
+ __nfulnl_send(inst);
spin_unlock_bh(&inst->lock);
instance_put(inst);
}
@@ -409,15 +389,14 @@ __build_packet_message(struct nfulnl_instance *inst,
const struct nf_loginfo *li,
const char *prefix, unsigned int plen)
{
- unsigned char *old_tail;
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
__be32 tmp_uint;
+ sk_buff_data_t old_tail = inst->skb->tail;
UDEBUG("entered\n");
- old_tail = inst->skb->tail;
nlh = NLMSG_PUT(inst->skb, 0, 0,
NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
sizeof(struct nfgenmsg));
@@ -509,11 +488,11 @@ __build_packet_message(struct nfulnl_instance *inst,
NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
}
- if (skb->tstamp.off_sec) {
+ if (skb->tstamp.tv64) {
struct nfulnl_msg_packet_timestamp ts;
-
- ts.sec = cpu_to_be64(skb->tstamp.off_sec);
- ts.usec = cpu_to_be64(skb->tstamp.off_usec);
+ struct timeval tv = ktime_to_timeval(skb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
}
@@ -596,7 +575,6 @@ nfulnl_log_packet(unsigned int pf,
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
- unsigned int nlbufsiz;
unsigned int plen;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -606,12 +584,7 @@ nfulnl_log_packet(unsigned int pf,
inst = instance_lookup_get(li->u.ulog.group);
if (!inst)
- inst = instance_lookup_get(0);
- if (!inst) {
- PRINTR("nfnetlink_log: trying to log packet, "
- "but no instance for group %u\n", li->u.ulog.group);
return;
- }
plen = 0;
if (prefix)
@@ -667,24 +640,11 @@ nfulnl_log_packet(unsigned int pf,
break;
default:
- spin_unlock_bh(&inst->lock);
- instance_put(inst);
- return;
+ goto unlock_and_release;
}
- if (size > inst->nlbufsiz)
- nlbufsiz = size;
- else
- nlbufsiz = inst->nlbufsiz;
-
- if (!inst->skb) {
- if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
- UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
- inst->nlbufsiz, size);
- goto alloc_failure;
- }
- } else if (inst->qlen >= qthreshold ||
- size > skb_tailroom(inst->skb)) {
+ if (inst->qlen >= qthreshold ||
+ (inst->skb && size > skb_tailroom(inst->skb))) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
UDEBUG("flushing old skb\n");
@@ -693,12 +653,12 @@ nfulnl_log_packet(unsigned int pf,
if (del_timer(&inst->timer))
instance_put(inst);
__nfulnl_send(inst);
+ }
- if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
- UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
- inst->nlbufsiz, size);
+ if (!inst->skb) {
+ inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+ if (!inst->skb)
goto alloc_failure;
- }
}
UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
@@ -760,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = {
static int
nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
return -ENOTSUPP;
}
@@ -798,7 +758,7 @@ static const int nfula_cfg_min[NFULA_CFG_MAX] = {
static int
nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfula[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
@@ -830,13 +790,13 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
NETLINK_CB(skb).pid);
if (!inst) {
ret = -EINVAL;
- goto out_put;
+ goto out;
}
break;
case NFULNL_CFG_CMD_UNBIND:
if (!inst) {
ret = -ENODEV;
- goto out_put;
+ goto out;
}
if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -845,7 +805,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
instance_destroy(inst);
- break;
+ goto out;
case NFULNL_CFG_CMD_PF_BIND:
UDEBUG("registering log handler for pf=%u\n", pf);
ret = nf_log_register(pf, &nfulnl_logger);
@@ -869,7 +829,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
"group=%u pid=%u =>ENOENT\n",
group_num, NETLINK_CB(skb).pid);
ret = -ENOENT;
- goto out_put;
+ goto out;
}
if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -939,10 +899,8 @@ struct iter_state {
unsigned int bucket;
};
-static struct hlist_node *get_first(struct seq_file *seq)
+static struct hlist_node *get_first(struct iter_state *st)
{
- struct iter_state *st = seq->private;
-
if (!st)
return NULL;
@@ -953,10 +911,8 @@ static struct hlist_node *get_first(struct seq_file *seq)
return NULL;
}
-static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
{
- struct iter_state *st = seq->private;
-
h = h->next;
while (!h) {
if (++st->bucket >= INSTANCE_BUCKETS)
@@ -967,13 +923,13 @@ static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
return h;
}
-static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
{
struct hlist_node *head;
- head = get_first(seq);
+ head = get_first(st);
if (head)
- while (pos && (head = get_next(seq, head)))
+ while (pos && (head = get_next(st, head)))
pos--;
return pos ? NULL : head;
}
@@ -981,13 +937,13 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
read_lock_bh(&instances_lock);
- return get_idx(seq, *pos);
+ return get_idx(seq->private, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
- return get_next(s, v);
+ return get_next(s->private, v);
}
static void seq_stop(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce4a71d0f3..7a97bec67729 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -338,7 +338,7 @@ static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nfqnl_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size;
size_t data_len = 0;
struct sk_buff *skb;
@@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg));
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
}
- if (entskb->tstamp.off_sec) {
+ if (entskb->tstamp.tv64) {
struct nfqnl_msg_packet_timestamp ts;
-
- ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
- ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
+ struct timeval tv = ktime_to_timeval(entskb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
}
@@ -648,7 +648,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
}
if (!skb_make_writable(&e->skb, data_len))
return -ENOMEM;
- memcpy(e->skb->data, data, data_len);
+ skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
}
@@ -783,7 +783,7 @@ static const int nfqa_verdict_min[NFQA_MAX] = {
static int
nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -848,7 +848,7 @@ err_out_put:
static int
nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
return -ENOTSUPP;
}
@@ -865,7 +865,7 @@ static struct nf_queue_handler nfqh = {
static int
nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ec607a421a5a..0eb2504b89b5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -56,8 +56,8 @@ enum {
};
static const char *xt_prefix[NPROTO] = {
- [AF_INET] = "ip",
- [AF_INET6] = "ip6",
+ [AF_INET] = "ip",
+ [AF_INET6] = "ip6",
[NF_ARP] = "arp",
};
@@ -651,12 +651,6 @@ void *xt_unregister_table(struct xt_table *table)
EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
-static char *xt_proto_prefix[NPROTO] = {
- [AF_INET] = "ip",
- [AF_INET6] = "ip6",
- [NF_ARP] = "arp",
-};
-
static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
{
struct list_head *head = list->next;
@@ -798,7 +792,7 @@ int xt_proto_init(int af)
#ifdef CONFIG_PROC_FS
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
@@ -806,14 +800,14 @@ int xt_proto_init(int af)
proc->data = (void *) ((unsigned long) af | (TABLE << 16));
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
goto out_remove_tables;
proc->data = (void *) ((unsigned long) af | (MATCH << 16));
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
@@ -825,12 +819,12 @@ int xt_proto_init(int af)
#ifdef CONFIG_PROC_FS
out_remove_matches:
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc_net_remove(buf);
out_remove_tables:
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc_net_remove(buf);
out:
@@ -844,15 +838,15 @@ void xt_proto_fini(int af)
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc_net_remove(buf);
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc_net_remove(buf);
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc_net_remove(buf);
#endif /*CONFIG_PROC_FS*/
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 795c058b16a5..b03ce009d0bf 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -30,10 +30,7 @@ MODULE_ALIAS("ipt_CONNMARK");
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack_ecache.h>
-#endif
static unsigned int
target(struct sk_buff **pskb,
@@ -44,40 +41,33 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_connmark_target_info *markinfo = targinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
u_int32_t diff;
u_int32_t mark;
u_int32_t newmark;
- u_int32_t ctinfo;
- u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
- if (ctmark) {
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct) {
switch(markinfo->mode) {
case XT_CONNMARK_SET:
- newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
- if (newmark != *ctmark) {
- *ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
- ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+ newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != ct->mark) {
+ ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
}
break;
case XT_CONNMARK_SAVE:
- newmark = (*ctmark & ~markinfo->mask) |
+ newmark = (ct->mark & ~markinfo->mask) |
((*pskb)->mark & markinfo->mask);
- if (*ctmark != newmark) {
- *ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
- ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+ if (ct->mark != newmark) {
+ ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
}
break;
case XT_CONNMARK_RESTORE:
mark = (*pskb)->mark;
- diff = (*ctmark ^ mark) & markinfo->mask;
+ diff = (ct->mark ^ mark) & markinfo->mask;
(*pskb)->mark = mark ^ diff;
break;
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 1ab0db641f96..81c0c58bab47 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -19,7 +19,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CONNSECMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
#define PFX "CONNSECMARK: "
@@ -36,12 +36,12 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
static void secmark_save(struct sk_buff *skb)
{
if (skb->secmark) {
- u32 *connsecmark;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- connsecmark = nf_ct_get_secmark(skb, &ctinfo);
- if (connsecmark && !*connsecmark)
- *connsecmark = skb->secmark;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && !ct->secmark)
+ ct->secmark = skb->secmark;
}
}
@@ -52,12 +52,12 @@ static void secmark_save(struct sk_buff *skb)
static void secmark_restore(struct sk_buff *skb)
{
if (!skb->secmark) {
- u32 *connsecmark;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- connsecmark = nf_ct_get_secmark(skb, &ctinfo);
- if (connsecmark && *connsecmark)
- skb->secmark = *connsecmark;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && ct->secmark)
+ skb->secmark = ct->secmark;
}
}
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a7cc75aeb38d..9f2f2201f6ae 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -8,8 +8,6 @@
* published by the Free Software Foundation.
*
* See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
*/
#include <linux/module.h>
@@ -35,13 +33,13 @@ static unsigned int target(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
- ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+ ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
@@ -56,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
return NF_DROP;
- ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+ ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b874a2008b2b..5085fb3d1e2d 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -5,7 +5,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_NOTRACK");
@@ -26,7 +26,7 @@ target(struct sk_buff **pskb,
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- nf_ct_untrack(*pskb);
+ (*pskb)->nfct = &nf_conntrack_untracked.ct_general;
(*pskb)->nfctinfo = IP_CT_NEW;
nf_conntrack_get((*pskb)->nfct);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de2..15fe8f649510 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
return -1;
tcplen = (*pskb)->len - tcphoff;
- tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
/* Since it passed flags test in tcp match, we know it is is
not a fragment, and has data >= tcp header length. SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
return -1;
kfree_skb(*pskb);
*pskb = newskb;
- tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
}
skb_put((*pskb), TCPOLEN_MSS);
@@ -145,7 +145,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
__be16 newlen;
int ret;
@@ -154,7 +154,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
newlen = htons(ntohs(iph->tot_len) + ret);
nf_csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -171,7 +171,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h;
+ struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
u8 nexthdr;
int tcphoff;
int ret;
@@ -187,7 +187,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- ipv6h = (*pskb)->nh.ipv6h;
+ ipv6h = ipv6_hdr(*pskb);
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e32dfa2668b..804afe55e141 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,20 +1,11 @@
/* Kernel module to match connection tracking byte counter.
* GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * - reimplemented to use per-connection accounting counters
- * - add functionality to match number of packets
- * - add functionality to match average packet size
- * - add support to match directions seperately
- * 2005-10-16 Harald Welte <laforge@netfilter.org>
- * - Port to x_tables
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
#include <asm/div64.h>
#include <asm/bitops.h>
@@ -24,22 +15,6 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
MODULE_ALIAS("ipt_connbytes");
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- do_div(dividend, d);
- return dividend;
-}
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -51,13 +26,17 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_connbytes_info *sinfo = matchinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
u_int64_t what = 0; /* initialize to make gcc happy */
u_int64_t bytes = 0;
u_int64_t pkts = 0;
const struct ip_conntrack_counter *counters;
- if (!(counters = nf_ct_get_counters(skb)))
- return 0; /* no match */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+ counters = ct->counters;
switch (sinfo->what) {
case XT_CONNBYTES_PKTS:
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 36c2defff238..e1803256c792 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -21,16 +21,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
MODULE_DESCRIPTION("IP tables connmark match module");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_connmark");
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -42,12 +41,14 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_connmark_info *info = matchinfo;
- u_int32_t ctinfo;
- const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
- if (!ctmark)
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
return 0;
- return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
+ return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
}
static int
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2885c378288e..f4ea8fe07a53 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -10,121 +10,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_conntrack.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables connection tracking match module");
MODULE_ALIAS("ipt_conntrack");
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct xt_conntrack_info *sinfo = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int statebit;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
- if (ct == &ip_conntrack_untracked)
- statebit = XT_CONNTRACK_STATE_UNTRACKED;
- else if (ct)
- statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
- else
- statebit = XT_CONNTRACK_STATE_INVALID;
-
- if (sinfo->flags & XT_CONNTRACK_STATE) {
- if (ct) {
- if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
- statebit |= XT_CONNTRACK_STATE_SNAT;
- if (test_bit(IPS_DST_NAT_BIT, &ct->status))
- statebit |= XT_CONNTRACK_STATE_DNAT;
- }
- if (FWINV((statebit & sinfo->statemask) == 0,
- XT_CONNTRACK_STATE))
- return 0;
- }
-
- if (ct == NULL) {
- if (sinfo->flags & ~XT_CONNTRACK_STATE)
- return 0;
- return 1;
- }
-
- if (sinfo->flags & XT_CONNTRACK_PROTO &&
- FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
- XT_CONNTRACK_PROTO))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
- FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
- sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
- XT_CONNTRACK_ORIGSRC))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
- FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
- sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
- XT_CONNTRACK_ORIGDST))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
- FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
- sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
- sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
- XT_CONNTRACK_REPLSRC))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_REPLDST &&
- FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
- sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
- sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
- XT_CONNTRACK_REPLDST))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_STATUS &&
- FWINV((ct->status & sinfo->statusmask) == 0,
- XT_CONNTRACK_STATUS))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires = timer_pending(&ct->timeout) ?
- (ct->timeout.expires - jiffies)/HZ : 0;
-
- if (FWINV(!(expires >= sinfo->expires_min &&
- expires <= sinfo->expires_max),
- XT_CONNTRACK_EXPIRES))
- return 0;
- }
- return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -220,8 +114,6 @@ match(const struct sk_buff *skb,
return 1;
}
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
static int
checkentry(const char *tablename,
const void *ip,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26c7f4ad102a..56b247ecc283 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
*
- * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
* (C) 2002 by Harald Welte <laforge@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -34,7 +32,7 @@ static int match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_dscp_info *info = matchinfo;
- u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
return (dscp == info->dscp) ^ !!info->invert;
}
@@ -49,7 +47,7 @@ static int match6(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_dscp_info *info = matchinfo;
- u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
return (dscp == info->dscp) ^ !!info->invert;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9f37d593ca38..d3043fa32ebc 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -216,10 +216,8 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
hinfo->pde->proc_fops = &dl_file_ops;
hinfo->pde->data = hinfo;
- init_timer(&hinfo->timer);
+ setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
- hinfo->timer.data = (unsigned long )hinfo;
- hinfo->timer.function = htable_gc;
add_timer(&hinfo->timer);
spin_lock_bh(&hashlimit_lock);
@@ -380,22 +378,22 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
switch (hinfo->family) {
case AF_INET:
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
- dst->addr.ip.dst = skb->nh.iph->daddr;
+ dst->addr.ip.dst = ip_hdr(skb)->daddr;
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
- dst->addr.ip.src = skb->nh.iph->saddr;
+ dst->addr.ip.src = ip_hdr(skb)->saddr;
if (!(hinfo->cfg.mode &
(XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
return 0;
- nexthdr = skb->nh.iph->protocol;
+ nexthdr = ip_hdr(skb)->protocol;
break;
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
case AF_INET6:
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
- memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr,
+ memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
sizeof(dst->addr.ip6.dst));
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
- memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr,
+ memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
sizeof(dst->addr.ip6.src));
if (!(hinfo->cfg.mode &
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 407d1d5da8a1..c139b2f43a10 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -5,26 +5,16 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - Port to newnat infrastructure
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#endif
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_helper.h>
-#include <net/netfilter/nf_conntrack_compat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -38,55 +28,6 @@ MODULE_ALIAS("ip6t_helper");
#define DEBUGP(format, args...)
#endif
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct xt_helper_info *info = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- int ret = info->invert;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
- if (!ct) {
- DEBUGP("xt_helper: Eek! invalid conntrack?\n");
- return ret;
- }
-
- if (!ct->master) {
- DEBUGP("xt_helper: conntrack %p has no master\n", ct);
- return ret;
- }
-
- read_lock_bh(&ip_conntrack_lock);
- if (!ct->master->helper) {
- DEBUGP("xt_helper: master ct %p has no helper\n",
- exp->expectant);
- goto out_unlock;
- }
-
- DEBUGP("master's name = %s , info->name = %s\n",
- ct->master->helper->name, info->name);
-
- if (info->name[0] == '\0')
- ret ^= 1;
- else
- ret ^= !strncmp(ct->master->helper->name, info->name,
- strlen(ct->master->helper->name));
-out_unlock:
- read_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -134,7 +75,6 @@ out_unlock:
read_unlock_bh(&nf_conntrack_lock);
return ret;
}
-#endif
static int check(const char *tablename,
const void *inf,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 32fb998d9bac..77288c5ada78 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_length_info *info = matchinfo;
- u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+ u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
@@ -47,7 +47,8 @@ match6(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_length_info *info = matchinfo;
- u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+ const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr));
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 6fd8347c0058..571a72ab89ad 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,10 +1,3 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- * `limit', removed logging. Did I mention that
- * Alexey is a fucking genius?
- * Rusty Russell (rusty@rustcorp.com.au). */
-
/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
* (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
*
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b26..1d3a1d98b885 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
const struct xt_mac_info *info = matchinfo;
/* Is mac pointer valid? */
- return (skb->mac.raw >= skb->head
- && (skb->mac.raw + ETH_HLEN) <= skb->data
+ return (skb_mac_header(skb) >= skb->head &&
+ (skb_mac_header(skb) + ETH_HLEN) <= skb->data
/* If so, compare... */
&& ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
^ info->invert));
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 16e7b0804287..e1409fc5c288 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
const struct xt_pkttype_info *info = matchinfo;
if (skb->pkt_type == PACKET_LOOPBACK)
- type = (MULTICAST(skb->nh.iph->daddr)
+ type = (MULTICAST(ip_hdr(skb)->daddr)
? PACKET_MULTICAST
: PACKET_BROADCAST);
else
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 97ffc2fbc19d..c2017f8af9c4 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the routing realm
*
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
- *
* (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index df37b912163a..149294f7df71 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -10,7 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_state.h>
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
if (nf_ct_is_untracked(skb))
statebit = XT_STATE_UNTRACKED;
- else if (!nf_ct_get_ctinfo(skb, &ctinfo))
+ else if (!nf_ct_get(skb, &ctinfo))
statebit = XT_STATE_INVALID;
else
statebit = XT_STATE_BIT(ctinfo);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index e03a3282c551..f2535e7f2869 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -263,9 +263,6 @@ int netlbl_socket_setattr(const struct socket *sock,
int ret_val = -ENOENT;
struct netlbl_dom_map *dom_entry;
- if ((secattr->flags & NETLBL_SECATTR_DOMAIN) == 0)
- return -ENOENT;
-
rcu_read_lock();
dom_entry = netlbl_domhsh_getentry(secattr->domain);
if (dom_entry == NULL)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e73d8f546c6b..1f15821c8da4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -45,7 +45,6 @@
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/security.h>
#include <linux/jhash.h>
@@ -56,6 +55,7 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <linux/selinux.h>
+#include <linux/mutex.h>
#include <net/sock.h>
#include <net/scm.h>
@@ -76,7 +76,8 @@ struct netlink_sock {
unsigned long state;
wait_queue_head_t wait;
struct netlink_callback *cb;
- spinlock_t cb_lock;
+ struct mutex *cb_mutex;
+ struct mutex cb_def_mutex;
void (*data_ready)(struct sock *sk, int bytes);
struct module *module;
};
@@ -108,6 +109,7 @@ struct netlink_table {
unsigned long *listeners;
unsigned int nl_nonroot;
unsigned int groups;
+ struct mutex *cb_mutex;
struct module *module;
int registered;
};
@@ -118,6 +120,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
static int netlink_dump(struct sock *sk);
static void netlink_destroy_callback(struct netlink_callback *cb);
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
static DEFINE_RWLOCK(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -136,6 +139,14 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
static void netlink_sock_destruct(struct sock *sk)
{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (nlk->cb) {
+ if (nlk->cb->done)
+ nlk->cb->done(nlk->cb);
+ netlink_destroy_callback(nlk->cb);
+ }
+
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -144,7 +155,6 @@ static void netlink_sock_destruct(struct sock *sk)
}
BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
- BUG_TRAP(!nlk_sk(sk)->cb);
BUG_TRAP(!nlk_sk(sk)->groups);
}
@@ -370,7 +380,8 @@ static struct proto netlink_proto = {
.obj_size = sizeof(struct netlink_sock),
};
-static int __netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+ int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -384,7 +395,12 @@ static int __netlink_create(struct socket *sock, int protocol)
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- spin_lock_init(&nlk->cb_lock);
+ if (cb_mutex)
+ nlk->cb_mutex = cb_mutex;
+ else {
+ nlk->cb_mutex = &nlk->cb_def_mutex;
+ mutex_init(nlk->cb_mutex);
+ }
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -395,8 +411,8 @@ static int __netlink_create(struct socket *sock, int protocol)
static int netlink_create(struct socket *sock, int protocol)
{
struct module *module = NULL;
+ struct mutex *cb_mutex;
struct netlink_sock *nlk;
- unsigned int groups;
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -418,10 +434,10 @@ static int netlink_create(struct socket *sock, int protocol)
if (nl_table[protocol].registered &&
try_module_get(nl_table[protocol].module))
module = nl_table[protocol].module;
- groups = nl_table[protocol].groups;
+ cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
- if ((err = __netlink_create(sock, protocol)) < 0)
+ if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
goto out_module;
nlk = nlk_sk(sock->sk);
@@ -443,21 +459,14 @@ static int netlink_release(struct socket *sock)
return 0;
netlink_remove(sk);
+ sock_orphan(sk);
nlk = nlk_sk(sk);
- spin_lock(&nlk->cb_lock);
- if (nlk->cb) {
- if (nlk->cb->done)
- nlk->cb->done(nlk->cb);
- netlink_destroy_callback(nlk->cb);
- nlk->cb = NULL;
- }
- spin_unlock(&nlk->cb_lock);
-
- /* OK. Socket is unlinked, and, therefore,
- no new packets will arrive */
+ /*
+ * OK. Socket is unlinked, any packets that arrive now
+ * will be purged.
+ */
- sock_orphan(sk);
sock->sk = NULL;
wake_up_interruptible_all(&nlk->wait);
@@ -1215,7 +1224,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (msg->msg_name) {
@@ -1235,13 +1244,14 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
siocb->scm = &scm;
}
siocb->scm->creds = *NETLINK_CREDS(skb);
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
skb_free_datagram(sk, skb);
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
netlink_dump(sk);
scm_recv(sock, msg, siocb->scm, flags);
-
out:
netlink_rcv_wake(sk);
return err ? : copied;
@@ -1265,7 +1275,7 @@ static void netlink_data_ready(struct sock *sk, int len)
struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
- struct module *module)
+ struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
struct sock *sk;
@@ -1280,7 +1290,7 @@ netlink_kernel_create(int unit, unsigned int groups,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(sock, unit) < 0)
+ if (__netlink_create(sock, cb_mutex, unit) < 0)
goto out_sock_release;
if (groups < 32)
@@ -1304,6 +1314,7 @@ netlink_kernel_create(int unit, unsigned int groups,
netlink_table_grab();
nl_table[unit].groups = groups;
nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
netlink_table_ungrab();
@@ -1346,7 +1357,7 @@ static int netlink_dump(struct sock *sk)
if (!skb)
goto errout;
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
cb = nlk->cb;
if (cb == NULL) {
@@ -1357,7 +1368,7 @@ static int netlink_dump(struct sock *sk)
len = cb->dump(skb, cb);
if (len > 0) {
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, len);
return 0;
@@ -1375,13 +1386,13 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
nlk->cb = NULL;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
return 0;
errout_skb:
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
kfree_skb(skb);
errout:
return err;
@@ -1413,19 +1424,24 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
/* A dump is in progress... */
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
if (nlk->cb) {
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
sock_put(sk);
return -EBUSY;
}
nlk->cb = cb;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_dump(sk);
sock_put(sk);
- return 0;
+
+ /* We successfully started a dump, by returning -EINTR we
+ * signal the queue mangement to interrupt processing of
+ * any netlink messages so userspace gets a chance to read
+ * the results. */
+ return -EINTR;
}
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1462,27 +1478,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
}
static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
- struct nlmsghdr *, int *))
+ struct nlmsghdr *))
{
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
- nlh = (struct nlmsghdr *) skb->data;
+ nlh = nlmsg_hdr(skb);
+ err = 0;
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
return 0;
- if (cb(skb, nlh, &err) < 0) {
- /* Not an error, but we have to interrupt processing
- * here. Note: that in this case we do not pull
- * message from skb, it will be processed later.
- */
- if (err == 0)
- return -1;
+ /* Only requests are handled by the kernel */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+ goto skip;
+
+ /* Skip control messages */
+ if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+ goto skip;
+
+ err = cb(skb, nlh);
+ if (err == -EINTR) {
+ /* Not an error, but we interrupt processing */
+ netlink_queue_skip(nlh, skb);
+ return err;
+ }
+skip:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err)
netlink_ack(skb, nlh, err);
- } else if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
netlink_queue_skip(nlh, skb);
}
@@ -1504,9 +1528,14 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
+ *
+ * The callback function may return -EINTR to signal that processing
+ * of netlink messages shall be interrupted. In this case the message
+ * currently being processed will NOT be requeued onto the receive
+ * queue.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
- int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
+ int (*cb)(struct sk_buff *, struct nlmsghdr *))
{
struct sk_buff *skb;
@@ -1537,7 +1566,7 @@ void netlink_run_queue(struct sock *sk, unsigned int *qlen,
* Pulls the given netlink message off the socket buffer so the next
* call to netlink_queue_run() will not reconsider the message.
*/
-void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
{
int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
@@ -1820,12 +1849,10 @@ core_initcall(netlink_proto_init);
EXPORT_SYMBOL(netlink_ack);
EXPORT_SYMBOL(netlink_run_queue);
-EXPORT_SYMBOL(netlink_queue_skip);
EXPORT_SYMBOL(netlink_broadcast);
EXPORT_SYMBOL(netlink_dump_start);
EXPORT_SYMBOL(netlink_kernel_create);
EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_err);
EXPORT_SYMBOL(netlink_set_nonroot);
EXPORT_SYMBOL(netlink_unicast);
EXPORT_SYMBOL(netlink_unregister_notifier);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 004139557e09..df5f820a4c32 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -67,6 +67,11 @@ static int validate_nla(struct nlattr *nla, int maxtype,
}
break;
+ case NLA_BINARY:
+ if (pt->len && attrlen > pt->len)
+ return -ERANGE;
+ break;
+
default:
if (pt->len)
minlen = pt->len;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c2996794eb25..6e31234a4196 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -295,66 +295,46 @@ int genl_unregister_family(struct genl_family *family)
return -ENOENT;
}
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
- int *errp)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct genl_ops *ops;
struct genl_family *family;
struct genl_info info;
struct genlmsghdr *hdr = nlmsg_data(nlh);
- int hdrlen, err = -EINVAL;
-
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- goto ignore;
-
- if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
- goto ignore;
+ int hdrlen, err;
family = genl_family_find_byid(nlh->nlmsg_type);
- if (family == NULL) {
- err = -ENOENT;
- goto errout;
- }
+ if (family == NULL)
+ return -ENOENT;
hdrlen = GENL_HDRLEN + family->hdrsize;
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- goto errout;
+ return -EINVAL;
ops = genl_get_cmd(hdr->cmd, family);
- if (ops == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops == NULL)
+ return -EOPNOTSUPP;
- if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
- err = -EPERM;
- goto errout;
- }
+ if ((ops->flags & GENL_ADMIN_PERM) &&
+ security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- if (ops->dumpit == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops->dumpit == NULL)
+ return -EOPNOTSUPP;
- *errp = err = netlink_dump_start(genl_sock, skb, nlh,
- ops->dumpit, ops->done);
- if (err == 0)
- skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
- skb->len));
- return -1;
+ return netlink_dump_start(genl_sock, skb, nlh,
+ ops->dumpit, ops->done);
}
- if (ops->doit == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops->doit == NULL)
+ return -EOPNOTSUPP;
if (family->attrbuf) {
err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
ops->policy);
if (err < 0)
- goto errout;
+ return err;
}
info.snd_seq = nlh->nlmsg_seq;
@@ -364,15 +344,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = family->attrbuf;
- *errp = err = ops->doit(skb, &info);
- return err;
-
-ignore:
- return 0;
-
-errout:
- *errp = err;
- return -1;
+ return ops->doit(skb, &info);
}
static void genl_rcv(struct sock *sk, int len)
@@ -586,7 +558,7 @@ static int __init genl_init(void)
netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
- genl_rcv, THIS_MODULE);
+ genl_rcv, NULL, THIS_MODULE);
if (genl_sock == NULL)
panic("GENL: Cannot initialize generic netlink\n");
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index bf9837dd95c4..5d4a26c2aa0c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -625,42 +625,42 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
ax25_address *source = NULL;
ax25_uid_assoc *user;
struct net_device *dev;
+ int err = 0;
lock_sock(sk);
if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
sock->state = SS_CONNECTED;
- release_sock(sk);
- return 0; /* Connect completed during a ERESTARTSYS event */
+ goto out_release; /* Connect completed during a ERESTARTSYS event */
}
if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
sock->state = SS_UNCONNECTED;
- release_sock(sk);
- return -ECONNREFUSED;
+ err = -ECONNREFUSED;
+ goto out_release;
}
if (sk->sk_state == TCP_ESTABLISHED) {
- release_sock(sk);
- return -EISCONN; /* No reconnect on a seqpacket socket */
+ err = -EISCONN; /* No reconnect on a seqpacket socket */
+ goto out_release;
}
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_release;
}
if (addr->sax25_family != AF_NETROM) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_release;
}
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
sock_reset_flag(sk, SOCK_ZAPPED);
if ((dev = nr_dev_first()) == NULL) {
- release_sock(sk);
- return -ENETUNREACH;
+ err = -ENETUNREACH;
+ goto out_release;
}
source = (ax25_address *)dev->dev_addr;
@@ -671,8 +671,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
} else {
if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
dev_put(dev);
- release_sock(sk);
- return -EPERM;
+ err = -EPERM;
+ goto out_release;
}
nr->user_addr = *source;
}
@@ -707,8 +707,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
/* Now the loop */
if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
- release_sock(sk);
- return -EINPROGRESS;
+ err = -EINPROGRESS;
+ goto out_release;
}
/*
@@ -716,46 +716,46 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
* closed.
*/
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- release_sock(sk);
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
sock->state = SS_UNCONNECTED;
- release_sock(sk);
- return sock_error(sk); /* Always set at this point */
+ err = sock_error(sk); /* Always set at this point */
+ goto out_release;
}
sock->state = SS_CONNECTED;
+
+out_release:
release_sock(sk);
- return 0;
+ return err;
}
static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -765,42 +765,40 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk);
if (sk->sk_type != SOCK_SEQPACKET) {
err = -EOPNOTSUPP;
- goto out;
+ goto out_release;
}
if (sk->sk_state != TCP_LISTEN) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
/*
* The write queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- current->state = TASK_INTERRUPTIBLE;
- release_sock(sk);
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -811,8 +809,9 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
sk_acceptq_removed(sk);
newsock->sk = newsk;
-out:
+out_release:
release_sock(sk);
+
return err;
}
@@ -878,7 +877,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
if (frametype == NR_PROTOEXT &&
circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return nr_rx_ip(skb, dev);
}
@@ -904,7 +903,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
}
if (sk != NULL) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
if (frametype == NR_CONNACK && skb->len == 22)
nr_sk(sk)->bpqext = 1;
@@ -1074,6 +1073,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
skb_reserve(skb, size - len);
+ skb_reset_transport_header(skb);
/*
* Push down the NET/ROM header
@@ -1094,14 +1094,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
/*
* Put the data on the end
*/
+ skb_put(skb, len);
- skb->h.raw = skb_put(skb, len);
-
- asmptr = skb->h.raw;
SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
/* User data follows immediately after the NET/ROM transport header */
- if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) {
+ if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
kfree_skb(skb);
err = -EFAULT;
goto out;
@@ -1149,7 +1147,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
return er;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1161,7 +1159,8 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sax != NULL) {
sax->sax25_family = AF_NETROM;
- memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN);
+ skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
+ AX25_ADDR_LEN);
}
msg->msg_namelen = sizeof(*sax);
@@ -1209,6 +1208,12 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
release_sock(sk);
return ret;
+ case SIOCGSTAMPNS:
+ lock_sock(sk);
+ ret = sock_get_timestampns(sk, argp);
+ release_sock(sk);
+ return ret;
+
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 9a97ed6e6910..c7b5d930e732 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,8 +56,8 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
/* Spoof incoming device */
skb->dev = dev;
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
skb->pkt_type = PACKET_HOST;
netif_rx(skb);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 5560acbaaa95..68176483617f 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -51,10 +51,12 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
return 1;
- skbn->h.raw = skbn->data;
+ skb_reset_transport_header(skbn);
while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index e856ae1b360a..f324d5df4186 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -34,8 +34,8 @@ int nr_loopback_queue(struct sk_buff *skb)
struct sk_buff *skbn;
if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
- memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
- skbn->h.raw = skbn->data;
+ skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
+ skb_reset_transport_header(skbn);
skb_queue_tail(&loopback_queue, skbn);
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 0cbfb611465b..e3e6c44e1890 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -40,7 +40,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
/* Save a copy of the Transport Header */
- memcpy(transport, skb->data, NR_TRANSPORT_LEN);
+ skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
skb_pull(skb, NR_TRANSPORT_LEN);
frontlen = skb_headroom(skb);
@@ -54,13 +54,13 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
/* Copy the user data */
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
skb_pull(skb, len);
/* Duplicate the Transport Header */
skb_push(skbn, NR_TRANSPORT_LEN);
- memcpy(skbn->data, transport, NR_TRANSPORT_LEN);
-
+ skb_copy_to_linear_data(skbn, transport,
+ NR_TRANSPORT_LEN);
if (skb->len > 0)
skbn->data[4] |= NR_MORE_FLAG;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 8e6bd4e9d82c..2f76e062609d 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void)
struct net_device *dev, *first = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
@@ -618,12 +618,13 @@ struct net_device *nr_dev_get(ax25_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev;
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 07b694d18870..04e7d0d2fd8f 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -226,13 +226,13 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
- memcpy(dptr, skb->data + 7, AX25_ADDR_LEN);
+ skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
dptr[6] &= ~AX25_CBIT;
dptr[6] &= ~AX25_EBIT;
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- memcpy(dptr, skb->data + 0, AX25_ADDR_LEN);
+ skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
dptr[6] &= ~AX25_CBIT;
dptr[6] |= AX25_EBIT;
dptr[6] |= AX25_SSSID_SPARE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8f2873..02e401cd683f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -114,22 +114,22 @@ On receive:
-----------
Incoming, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> data
+ mac_header -> ll header
+ data -> data
Outgoing, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
Incoming, dev->hard_header==NULL
- mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
- PPP makes it, that is wrong, because introduce assymetry
- between rx and tx paths.
- data -> data
+ mac_header -> UNKNOWN position. It is very likely, that it points to ll
+ header. PPP makes it, that is wrong, because introduce
+ assymetry between rx and tx paths.
+ data -> data
Outgoing, dev->hard_header==NULL
- mac.raw -> data. ll header is still not built!
- data -> data
+ mac_header -> data. ll header is still not built!
+ data -> data
Resume
If dev->hard_header==NULL we are unlikely to restore sensible ll header.
@@ -139,12 +139,12 @@ On transmit:
------------
dev->hard_header != NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
dev->hard_header == NULL (ll header is added by device, we cannot control it)
- mac.raw -> data
- data -> data
+ mac_header -> data
+ data -> data
We should set nh.raw on output to correct posistion,
packet classifier depends on it.
@@ -201,7 +201,8 @@ struct packet_sock {
struct packet_type prot_hook;
spinlock_t bind_lock;
unsigned int running:1, /* prot_hook is attached*/
- auxdata:1;
+ auxdata:1,
+ origdev:1;
int ifindex; /* bound device */
__be16 num;
#ifdef CONFIG_PACKET_MULTICAST
@@ -284,7 +285,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
* Incoming packets have ll header pulled,
* push it back.
*
- * For outgoing ones skb->data == skb->mac.raw
+ * For outgoing ones skb->data == skb_mac_header(skb)
* so that this procedure is noop.
*/
@@ -303,7 +304,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
spkt = &PACKET_SKB_CB(skb)->sa.pkt;
- skb_push(skb, skb->data-skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
/*
* The SOCK_PACKET socket receives _all_ frames.
@@ -401,14 +402,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
* notable one here. This should really be fixed at the driver level.
*/
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Try to align data part correctly */
if (dev->hard_header) {
skb->data -= dev->hard_header_len;
skb->tail -= dev->hard_header_len;
if (len < dev->hard_header_len)
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
@@ -488,10 +489,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
never delivered to user.
*/
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
+ skb_pull(skb, skb_network_offset(skb));
}
}
@@ -528,7 +529,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
+ if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
sll->sll_halen = 0;
if (dev->hard_header_parse)
@@ -582,6 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
unsigned short macoff, netoff;
struct sk_buff *copy_skb = NULL;
+ struct timeval tv;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -591,10 +596,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
if (dev->hard_header) {
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
+ skb_pull(skb, skb_network_offset(skb));
}
}
@@ -612,7 +617,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
} else {
- unsigned maclen = skb->nh.raw - skb->data;
+ unsigned maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
macoff = netoff - maclen;
}
@@ -656,12 +661,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
h->tp_snaplen = snaplen;
h->tp_mac = macoff;
h->tp_net = netoff;
- if (skb->tstamp.off_sec == 0) {
+ if (skb->tstamp.tv64 == 0) {
__net_timestamp(skb);
sock_enable_timestamp(sk);
}
- h->tp_sec = skb->tstamp.off_sec;
- h->tp_usec = skb->tstamp.off_usec;
+ tv = ktime_to_timeval(skb->tstamp);
+ h->tp_sec = tv.tv_sec;
+ h->tp_usec = tv.tv_usec;
sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
sll->sll_halen = 0;
@@ -671,7 +677,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
+ if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
h->tp_status = status;
smp_mb();
@@ -766,14 +775,14 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (dev->hard_header) {
int res;
err = -EINVAL;
res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (sock->type != SOCK_DGRAM) {
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->len = 0;
} else if (res < 0)
goto out_free;
@@ -1143,7 +1152,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_len = PACKET_SKB_CB(skb)->origlen;
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
- aux.tp_net = skb->nh.raw - skb->data;
+ aux.tp_net = skb_network_offset(skb);
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
}
@@ -1411,6 +1420,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
po->auxdata = !!val;
return 0;
}
+ case PACKET_ORIGDEV:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->origdev = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -1454,6 +1475,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
data = &val;
break;
+ case PACKET_ORIGDEV:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->origdev;
+
+ data = &val;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -1543,6 +1571,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
}
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
#ifdef CONFIG_INET
case SIOCADDRT:
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
new file mode 100644
index 000000000000..8b31759ee8b0
--- /dev/null
+++ b/net/rfkill/Kconfig
@@ -0,0 +1,24 @@
+#
+# RF switch subsystem configuration
+#
+menuconfig RFKILL
+ tristate "RF switch subsystem support"
+ help
+ Say Y here if you want to have control over RF switches
+ found on many WiFi, Bluetooth and IRDA cards.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rfkill.
+
+config RFKILL_INPUT
+ tristate "Input layer to RF switch connector"
+ depends on RFKILL && INPUT
+ help
+ Say Y here if you want kernel automatically toggle state
+ of RF switches on and off when user presses appropriate
+ button or a key on the keyboard. Without this module you
+ need a some kind of userspace application to control
+ state of the switches.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rfkill-input.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
new file mode 100644
index 000000000000..b38c430be057
--- /dev/null
+++ b/net/rfkill/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the RF switch subsystem.
+#
+
+obj-$(CONFIG_RFKILL) += rfkill.o
+obj-$(CONFIG_RFKILL_INPUT) += rfkill-input.o
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
new file mode 100644
index 000000000000..e5c840c30284
--- /dev/null
+++ b/net/rfkill/rfkill-input.c
@@ -0,0 +1,174 @@
+/*
+ * Input layer to RF Kill interface connector
+ *
+ * Copyright (c) 2007 Dmitry Torokhov
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/init.h>
+#include <linux/rfkill.h>
+
+MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
+MODULE_DESCRIPTION("Input layer to RF switch connector");
+MODULE_LICENSE("GPL");
+
+struct rfkill_task {
+ struct work_struct work;
+ enum rfkill_type type;
+ struct mutex mutex; /* ensures that task is serialized */
+ spinlock_t lock; /* for accessing last and desired state */
+ unsigned long last; /* last schedule */
+ enum rfkill_state desired_state; /* on/off */
+ enum rfkill_state current_state; /* on/off */
+};
+
+static void rfkill_task_handler(struct work_struct *work)
+{
+ struct rfkill_task *task = container_of(work, struct rfkill_task, work);
+ enum rfkill_state state;
+
+ mutex_lock(&task->mutex);
+
+ /*
+ * Use temp variable to fetch desired state to keep it
+ * consistent even if rfkill_schedule_toggle() runs in
+ * another thread or interrupts us.
+ */
+ state = task->desired_state;
+
+ if (state != task->current_state) {
+ rfkill_switch_all(task->type, state);
+ task->current_state = state;
+ }
+
+ mutex_unlock(&task->mutex);
+}
+
+static void rfkill_schedule_toggle(struct rfkill_task *task)
+{
+ unsigned int flags;
+
+ spin_lock_irqsave(&task->lock, flags);
+
+ if (time_after(jiffies, task->last + msecs_to_jiffies(200))) {
+ task->desired_state = !task->desired_state;
+ task->last = jiffies;
+ schedule_work(&task->work);
+ }
+
+ spin_unlock_irqrestore(&task->lock, flags);
+}
+
+#define DEFINE_RFKILL_TASK(n, t) \
+ struct rfkill_task n = { \
+ .work = __WORK_INITIALIZER(n.work, \
+ rfkill_task_handler), \
+ .type = t, \
+ .mutex = __MUTEX_INITIALIZER(n.mutex), \
+ .lock = __SPIN_LOCK_UNLOCKED(n.lock), \
+ .desired_state = RFKILL_STATE_ON, \
+ .current_state = RFKILL_STATE_ON, \
+ }
+
+static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
+static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH);
+
+static void rfkill_event(struct input_handle *handle, unsigned int type,
+ unsigned int code, int down)
+{
+ if (type == EV_KEY && down == 1) {
+ switch (code) {
+ case KEY_WLAN:
+ rfkill_schedule_toggle(&rfkill_wlan);
+ break;
+ case KEY_BLUETOOTH:
+ rfkill_schedule_toggle(&rfkill_bt);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int rfkill_connect(struct input_handler *handler, struct input_dev *dev,
+ const struct input_device_id *id)
+{
+ struct input_handle *handle;
+ int error;
+
+ handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->dev = dev;
+ handle->handler = handler;
+ handle->name = "rfkill";
+
+ error = input_register_handle(handle);
+ if (error)
+ goto err_free_handle;
+
+ error = input_open_device(handle);
+ if (error)
+ goto err_unregister_handle;
+
+ return 0;
+
+ err_unregister_handle:
+ input_unregister_handle(handle);
+ err_free_handle:
+ kfree(handle);
+ return error;
+}
+
+static void rfkill_disconnect(struct input_handle *handle)
+{
+ input_close_device(handle);
+ input_unregister_handle(handle);
+ kfree(handle);
+}
+
+static const struct input_device_id rfkill_ids[] = {
+ {
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+ .evbit = { BIT(EV_KEY) },
+ .keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) },
+ },
+ {
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+ .evbit = { BIT(EV_KEY) },
+ .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) },
+ },
+ { }
+};
+
+static struct input_handler rfkill_handler = {
+ .event = rfkill_event,
+ .connect = rfkill_connect,
+ .disconnect = rfkill_disconnect,
+ .name = "rfkill",
+ .id_table = rfkill_ids,
+};
+
+static int __init rfkill_handler_init(void)
+{
+ return input_register_handler(&rfkill_handler);
+}
+
+static void __exit rfkill_handler_exit(void)
+{
+ input_unregister_handler(&rfkill_handler);
+ flush_scheduled_work();
+}
+
+module_init(rfkill_handler_init);
+module_exit(rfkill_handler_exit);
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
new file mode 100644
index 000000000000..a973603e3880
--- /dev/null
+++ b/net/rfkill/rfkill.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2006 Ivo van Doorn
+ * Copyright (C) 2007 Dmitry Torokhov
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/workqueue.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rfkill.h>
+
+MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
+MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("RF switch support");
+MODULE_LICENSE("GPL");
+
+static LIST_HEAD(rfkill_list); /* list of registered rf switches */
+static DEFINE_MUTEX(rfkill_mutex);
+
+static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX];
+
+static int rfkill_toggle_radio(struct rfkill *rfkill,
+ enum rfkill_state state)
+{
+ int retval;
+
+ retval = mutex_lock_interruptible(&rfkill->mutex);
+ if (retval)
+ return retval;
+
+ if (state != rfkill->state) {
+ retval = rfkill->toggle_radio(rfkill->data, state);
+ if (!retval)
+ rfkill->state = state;
+ }
+
+ mutex_unlock(&rfkill->mutex);
+ return retval;
+}
+
+/**
+ * rfkill_switch_all - Toggle state of all switches of given type
+ * @type: type of interfaces to be affeceted
+ * @state: the new state
+ *
+ * This function toggles state of all switches of given type unless
+ * a specific switch is claimed by userspace in which case it is
+ * left alone.
+ */
+
+void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
+{
+ struct rfkill *rfkill;
+
+ mutex_lock(&rfkill_mutex);
+
+ rfkill_states[type] = state;
+
+ list_for_each_entry(rfkill, &rfkill_list, node) {
+ if (!rfkill->user_claim)
+ rfkill_toggle_radio(rfkill, state);
+ }
+
+ mutex_unlock(&rfkill_mutex);
+}
+EXPORT_SYMBOL(rfkill_switch_all);
+
+static ssize_t rfkill_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ return sprintf(buf, "%s\n", rfkill->name);
+}
+
+static ssize_t rfkill_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+ const char *type;
+
+ switch (rfkill->type) {
+ case RFKILL_TYPE_WLAN:
+ type = "wlan";
+ break;
+ case RFKILL_TYPE_BLUETOOTH:
+ type = "bluetooth";
+ break;
+ case RFKILL_TYPE_IRDA:
+ type = "irda";
+ break;
+ default:
+ BUG();
+ }
+
+ return sprintf(buf, "%s\n", type);
+}
+
+static ssize_t rfkill_state_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ return sprintf(buf, "%d\n", rfkill->state);
+}
+
+static ssize_t rfkill_state_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+ unsigned int state = simple_strtoul(buf, NULL, 0);
+ int error;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ error = rfkill_toggle_radio(rfkill,
+ state ? RFKILL_STATE_ON : RFKILL_STATE_OFF);
+ if (error)
+ return error;
+
+ return count;
+}
+
+static ssize_t rfkill_claim_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ return sprintf(buf, "%d", rfkill->user_claim);
+}
+
+static ssize_t rfkill_claim_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+ bool claim = !!simple_strtoul(buf, NULL, 0);
+ int error;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ /*
+ * Take the global lock to make sure the kernel is not in
+ * the middle of rfkill_switch_all
+ */
+ error = mutex_lock_interruptible(&rfkill_mutex);
+ if (error)
+ return error;
+
+ if (rfkill->user_claim != claim) {
+ if (!claim)
+ rfkill_toggle_radio(rfkill,
+ rfkill_states[rfkill->type]);
+ rfkill->user_claim = claim;
+ }
+
+ mutex_unlock(&rfkill_mutex);
+
+ return count;
+}
+
+static struct device_attribute rfkill_dev_attrs[] = {
+ __ATTR(name, S_IRUGO, rfkill_name_show, NULL),
+ __ATTR(type, S_IRUGO, rfkill_type_show, NULL),
+ __ATTR(state, S_IRUGO, rfkill_state_show, rfkill_state_store),
+ __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
+ __ATTR_NULL
+};
+
+static void rfkill_release(struct device *dev)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ kfree(rfkill);
+ module_put(THIS_MODULE);
+}
+
+#ifdef CONFIG_PM
+static int rfkill_suspend(struct device *dev, pm_message_t state)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ if (dev->power.power_state.event != state.event) {
+ if (state.event == PM_EVENT_SUSPEND) {
+ mutex_lock(&rfkill->mutex);
+
+ if (rfkill->state == RFKILL_STATE_ON)
+ rfkill->toggle_radio(rfkill->data,
+ RFKILL_STATE_OFF);
+
+ mutex_unlock(&rfkill->mutex);
+ }
+
+ dev->power.power_state = state;
+ }
+
+ return 0;
+}
+
+static int rfkill_resume(struct device *dev)
+{
+ struct rfkill *rfkill = to_rfkill(dev);
+
+ if (dev->power.power_state.event != PM_EVENT_ON) {
+ mutex_lock(&rfkill->mutex);
+
+ if (rfkill->state == RFKILL_STATE_ON)
+ rfkill->toggle_radio(rfkill->data, RFKILL_STATE_ON);
+
+ mutex_unlock(&rfkill->mutex);
+ }
+
+ dev->power.power_state = PMSG_ON;
+ return 0;
+}
+#else
+#define rfkill_suspend NULL
+#define rfkill_resume NULL
+#endif
+
+static struct class rfkill_class = {
+ .name = "rfkill",
+ .dev_release = rfkill_release,
+ .dev_attrs = rfkill_dev_attrs,
+ .suspend = rfkill_suspend,
+ .resume = rfkill_resume,
+};
+
+static int rfkill_add_switch(struct rfkill *rfkill)
+{
+ int retval;
+
+ retval = mutex_lock_interruptible(&rfkill_mutex);
+ if (retval)
+ return retval;
+
+ retval = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]);
+ if (retval)
+ goto out;
+
+ list_add_tail(&rfkill->node, &rfkill_list);
+
+ out:
+ mutex_unlock(&rfkill_mutex);
+ return retval;
+}
+
+static void rfkill_remove_switch(struct rfkill *rfkill)
+{
+ mutex_lock(&rfkill_mutex);
+ list_del_init(&rfkill->node);
+ rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF);
+ mutex_unlock(&rfkill_mutex);
+}
+
+/**
+ * rfkill_allocate - allocate memory for rfkill structure.
+ * @parent: device that has rf switch on it
+ * @type: type of the switch (wlan, bluetooth, irda)
+ *
+ * This function should be called by the network driver when it needs
+ * rfkill structure. Once the structure is allocated the driver shoud
+ * finish its initialization by setting name, private data, enable_radio
+ * and disable_radio methods and then register it with rfkill_register().
+ * NOTE: If registration fails the structure shoudl be freed by calling
+ * rfkill_free() otherwise rfkill_unregister() should be used.
+ */
+struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type)
+{
+ struct rfkill *rfkill;
+ struct device *dev;
+
+ rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL);
+ if (rfkill)
+ return NULL;
+
+ mutex_init(&rfkill->mutex);
+ INIT_LIST_HEAD(&rfkill->node);
+ rfkill->type = type;
+
+ dev = &rfkill->dev;
+ dev->class = &rfkill_class;
+ dev->parent = parent;
+ device_initialize(dev);
+
+ __module_get(THIS_MODULE);
+
+ return rfkill;
+}
+EXPORT_SYMBOL(rfkill_allocate);
+
+/**
+ * rfkill_free - Mark rfkill structure for deletion
+ * @rfkill: rfkill structure to be destroyed
+ *
+ * Decrements reference count of rfkill structure so it is destoryed.
+ * Note that rfkill_free() should _not_ be called after rfkill_unregister().
+ */
+void rfkill_free(struct rfkill *rfkill)
+{
+ if (rfkill)
+ put_device(&rfkill->dev);
+}
+EXPORT_SYMBOL(rfkill_free);
+
+/**
+ * rfkill_register - Register a rfkill structure.
+ * @rfkill: rfkill structure to be registered
+ *
+ * This function should be called by the network driver when the rfkill
+ * structure needs to be registered. Immediately from registration the
+ * switch driver should be able to service calls to toggle_radio.
+ */
+int rfkill_register(struct rfkill *rfkill)
+{
+ static atomic_t rfkill_no = ATOMIC_INIT(0);
+ struct device *dev = &rfkill->dev;
+ int error;
+
+ if (!rfkill->toggle_radio)
+ return -EINVAL;
+
+ error = rfkill_add_switch(rfkill);
+ if (error)
+ return error;
+
+ snprintf(dev->bus_id, sizeof(dev->bus_id),
+ "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
+
+ error = device_add(dev);
+ if (error) {
+ rfkill_remove_switch(rfkill);
+ return error;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(rfkill_register);
+
+/**
+ * rfkill_unregister - Uegister a rfkill structure.
+ * @rfkill: rfkill structure to be unregistered
+ *
+ * This function should be called by the network driver during device
+ * teardown to destroy rfkill structure. Note that rfkill_free() should
+ * _not_ be called after rfkill_unregister().
+ */
+void rfkill_unregister(struct rfkill *rfkill)
+{
+ device_del(&rfkill->dev);
+ rfkill_remove_switch(rfkill);
+ put_device(&rfkill->dev);
+}
+EXPORT_SYMBOL(rfkill_unregister);
+
+/*
+ * Rfkill module initialization/deinitialization.
+ */
+static int __init rfkill_init(void)
+{
+ int error;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rfkill_states); i++)
+ rfkill_states[i] = RFKILL_STATE_ON;
+
+ error = class_register(&rfkill_class);
+ if (error) {
+ printk(KERN_ERR "rfkill: unable to register rfkill class\n");
+ return error;
+ }
+
+ return 0;
+}
+
+static void __exit rfkill_exit(void)
+{
+ class_unregister(&rfkill_class);
+}
+
+module_init(rfkill_init);
+module_exit(rfkill_exit);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8c34f1ca6c8c..d476c43d5216 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -700,23 +700,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
unsigned char cause, diagnostic;
struct net_device *dev;
ax25_uid_assoc *user;
- int n;
-
- if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
- sock->state = SS_CONNECTED;
- return 0; /* Connect completed during a ERESTARTSYS event */
- }
-
- if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
- sock->state = SS_UNCONNECTED;
- return -ECONNREFUSED;
- }
-
- if (sk->sk_state == TCP_ESTABLISHED)
- return -EISCONN; /* No reconnect on a seqpacket socket */
-
- sk->sk_state = TCP_CLOSE;
- sock->state = SS_UNCONNECTED;
+ int n, err = 0;
if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
return -EINVAL;
@@ -734,24 +718,53 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
if ((rose->source_ndigis + addr->srose_ndigis) > ROSE_MAX_DIGIS)
return -EINVAL;
+ lock_sock(sk);
+
+ if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
+ /* Connect completed during a ERESTARTSYS event */
+ sock->state = SS_CONNECTED;
+ goto out_release;
+ }
+
+ if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
+ sock->state = SS_UNCONNECTED;
+ err = -ECONNREFUSED;
+ goto out_release;
+ }
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* No reconnect on a seqpacket socket */
+ err = -EISCONN;
+ goto out_release;
+ }
+
+ sk->sk_state = TCP_CLOSE;
+ sock->state = SS_UNCONNECTED;
+
rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
&diagnostic);
if (!rose->neighbour)
return -ENETUNREACH;
rose->lci = rose_new_lci(rose->neighbour);
- if (!rose->lci)
- return -ENETUNREACH;
+ if (!rose->lci) {
+ err = -ENETUNREACH;
+ goto out_release;
+ }
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
sock_reset_flag(sk, SOCK_ZAPPED);
- if ((dev = rose_dev_first()) == NULL)
- return -ENETUNREACH;
+ if ((dev = rose_dev_first()) == NULL) {
+ err = -ENETUNREACH;
+ goto out_release;
+ }
user = ax25_findbyuid(current->euid);
- if (!user)
- return -EINVAL;
+ if (!user) {
+ err = -EINVAL;
+ goto out_release;
+ }
memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
rose->source_call = user->call;
@@ -789,32 +802,36 @@ rose_try_next_neigh:
rose_start_t1timer(sk);
/* Now the loop */
- if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
- return -EINPROGRESS;
+ if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
+ err = -EINPROGRESS;
+ goto out_release;
+ }
/*
* A Connect Ack with Choke or timeout or failed routing will go to
* closed.
*/
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
+ lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
@@ -822,22 +839,26 @@ rose_try_next_neigh:
rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
if (rose->neighbour)
goto rose_try_next_neigh;
- /* No more neighbour */
+
+ /* No more neighbours */
sock->state = SS_UNCONNECTED;
- return sock_error(sk); /* Always set at this point */
+ err = sock_error(sk); /* Always set at this point */
+ goto out_release;
}
sock->state = SS_CONNECTED;
- return 0;
+out_release:
+ release_sock(sk);
+
+ return err;
}
static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -847,40 +868,41 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk);
if (sk->sk_type != SOCK_SEQPACKET) {
err = -EOPNOTSUPP;
- goto out;
+ goto out_release;
}
if (sk->sk_state != TCP_LISTEN) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
/*
* The write queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- current->state = TASK_INTERRUPTIBLE;
- release_sock(sk);
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -892,7 +914,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
sk->sk_ack_backlog--;
newsock->sk = newsk;
-out:
+out_release:
release_sock(sk);
return err;
@@ -1081,9 +1103,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
*/
SOCK_DEBUG(sk, "ROSE: Appending user data\n");
- asmptr = skb->h.raw = skb_put(skb, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1131,7 +1154,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
int lg;
/* Save a copy of the Header */
- memcpy(header, skb->data, ROSE_MIN_LEN);
+ skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
skb_pull(skb, ROSE_MIN_LEN);
frontlen = skb_headroom(skb);
@@ -1151,12 +1174,12 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
/* Copy the user data */
- memcpy(skb_put(skbn, lg), skb->data, lg);
+ skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
skb_pull(skb, lg);
/* Duplicate the Header */
skb_push(skbn, ROSE_MIN_LEN);
- memcpy(skbn->data, header, ROSE_MIN_LEN);
+ skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
if (skb->len > 0)
skbn->data[2] |= M_BIT;
@@ -1210,7 +1233,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
*asmptr = qbit;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1272,6 +1295,9 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *) argp);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *) argp);
+
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3e41bd93ab9f..cd01642f0491 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -77,7 +77,7 @@ static void rose_loopback_timer(unsigned long param)
dest = (rose_address *)(skb->data + 4);
lci_o = 0xFFF - lci_i;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
sk = rose_find_socket(lci_o, &rose_loopback_neigh);
if (sk) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a1233e1b1ab6..929a784a86d7 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -596,7 +596,7 @@ struct net_device *rose_dev_first(void)
struct net_device *dev, *first = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
@@ -614,12 +614,13 @@ struct net_device *rose_dev_get(rose_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev;
@@ -630,10 +631,11 @@ static int rose_dev_exists(rose_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
goto out;
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev != NULL;
@@ -906,7 +908,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
}
}
else {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
res = rose_process_rx_frame(sk, skb);
goto out;
}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 000000000000..91b3d52f6f1a
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,43 @@
+#
+# RxRPC session sockets
+#
+
+config AF_RXRPC
+ tristate "RxRPC session sockets"
+ depends on EXPERIMENTAL
+ select KEYS
+ help
+ Say Y or M here to include support for RxRPC session sockets (just
+ the transport part, not the presentation part: (un)marshalling is
+ left to the application).
+
+ These are used for AFS kernel filesystem and userspace utilities.
+
+ This module at the moment only supports client operations and is
+ currently incomplete.
+
+ See Documentation/networking/rxrpc.txt.
+
+
+config AF_RXRPC_DEBUG
+ bool "RxRPC dynamic debugging"
+ depends on AF_RXRPC
+ help
+ Say Y here to make runtime controllable debugging messages appear.
+
+ See Documentation/networking/rxrpc.txt.
+
+
+config RXKAD
+ tristate "RxRPC Kerberos security"
+ depends on AF_RXRPC
+ select CRYPTO
+ select CRYPTO_MANAGER
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_PCBC
+ select CRYPTO_FCRYPT
+ help
+ Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
+ through the use of the key retention service.
+
+ See Documentation/networking/rxrpc.txt.
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6efcb6f162a0..c46867c61c98 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,25 +1,29 @@
#
-# Makefile for Linux kernel Rx RPC
+# Makefile for Linux kernel RxRPC
#
-#CFLAGS += -finstrument-functions
-
-rxrpc-objs := \
- call.o \
- connection.o \
- krxiod.o \
- krxsecd.o \
- krxtimod.o \
- main.o \
- peer.o \
- rxrpc_syms.o \
- transport.o
+af-rxrpc-objs := \
+ af_rxrpc.o \
+ ar-accept.o \
+ ar-ack.o \
+ ar-call.o \
+ ar-connection.o \
+ ar-connevent.o \
+ ar-error.o \
+ ar-input.o \
+ ar-key.o \
+ ar-local.o \
+ ar-output.o \
+ ar-peer.o \
+ ar-recvmsg.o \
+ ar-security.o \
+ ar-skbuff.o \
+ ar-transport.o
ifeq ($(CONFIG_PROC_FS),y)
-rxrpc-objs += proc.o
-endif
-ifeq ($(CONFIG_SYSCTL),y)
-rxrpc-objs += sysctl.o
+af-rxrpc-objs += ar-proc.o
endif
-obj-$(CONFIG_RXRPC) := rxrpc.o
+obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
+
+obj-$(CONFIG_RXKAD) += rxkad.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 000000000000..2c57df9c131b
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,879 @@
+/* AF_RXRPC implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+MODULE_DESCRIPTION("RxRPC network protocol");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_RXRPC);
+
+unsigned rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "RxRPC debugging mask");
+
+static int sysctl_rxrpc_max_qlen __read_mostly = 10;
+
+static struct proto rxrpc_proto;
+static const struct proto_ops rxrpc_rpc_ops;
+
+/* local epoch for detecting local-end reset */
+__be32 rxrpc_epoch;
+
+/* current debugging ID */
+atomic_t rxrpc_debug_id;
+
+/* count of skbs currently in use */
+atomic_t rxrpc_n_skbs;
+
+struct workqueue_struct *rxrpc_workqueue;
+
+static void rxrpc_sock_destructor(struct sock *);
+
+/*
+ * see if an RxRPC socket is currently writable
+ */
+static inline int rxrpc_writable(struct sock *sk)
+{
+ return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+}
+
+/*
+ * wait for write bufferage to become available
+ */
+static void rxrpc_write_space(struct sock *sk)
+{
+ _enter("%p", sk);
+ read_lock(&sk->sk_callback_lock);
+ if (rxrpc_writable(sk)) {
+ if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ wake_up_interruptible(sk->sk_sleep);
+ sk_wake_async(sk, 2, POLL_OUT);
+ }
+ read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * validate an RxRPC address
+ */
+static int rxrpc_validate_address(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
+ int len)
+{
+ if (len < sizeof(struct sockaddr_rxrpc))
+ return -EINVAL;
+
+ if (srx->srx_family != AF_RXRPC)
+ return -EAFNOSUPPORT;
+
+ if (srx->transport_type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ len -= offsetof(struct sockaddr_rxrpc, transport);
+ if (srx->transport_len < sizeof(sa_family_t) ||
+ srx->transport_len > len)
+ return -EINVAL;
+
+ if (srx->transport.family != rx->proto)
+ return -EAFNOSUPPORT;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ _debug("INET: %x @ %u.%u.%u.%u",
+ ntohs(srx->transport.sin.sin_port),
+ NIPQUAD(srx->transport.sin.sin_addr));
+ if (srx->transport_len > 8)
+ memset((void *)&srx->transport + 8, 0,
+ srx->transport_len - 8);
+ break;
+
+ case AF_INET6:
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 0;
+}
+
+/*
+ * bind a local address to an RxRPC socket
+ */
+static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr;
+ struct sock *sk = sock->sk;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+ __be16 service_id;
+ int ret;
+
+ _enter("%p,%p,%d", rx, saddr, len);
+
+ ret = rxrpc_validate_address(rx, srx, len);
+ if (ret < 0)
+ goto error;
+
+ lock_sock(&rx->sk);
+
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+ ret = -EINVAL;
+ goto error_unlock;
+ }
+
+ memcpy(&rx->srx, srx, sizeof(rx->srx));
+
+ /* find a local transport endpoint if we don't have one already */
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
+ }
+
+ rx->local = local;
+ if (srx->srx_service) {
+ service_id = htons(srx->srx_service);
+ write_lock_bh(&local->services_lock);
+ list_for_each_entry(prx, &local->services, listen_link) {
+ if (prx->service_id == service_id)
+ goto service_in_use;
+ }
+
+ rx->service_id = service_id;
+ list_add_tail(&rx->listen_link, &local->services);
+ write_unlock_bh(&local->services_lock);
+
+ rx->sk.sk_state = RXRPC_SERVER_BOUND;
+ } else {
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = 0");
+ return 0;
+
+service_in_use:
+ ret = -EADDRINUSE;
+ write_unlock_bh(&local->services_lock);
+error_unlock:
+ release_sock(&rx->sk);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * set the number of pending calls permitted on a listening socket
+ */
+static int rxrpc_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret;
+
+ _enter("%p,%d", rx, backlog);
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNCONNECTED:
+ ret = -EADDRNOTAVAIL;
+ break;
+ case RXRPC_CLIENT_BOUND:
+ case RXRPC_CLIENT_CONNECTED:
+ default:
+ ret = -EBUSY;
+ break;
+ case RXRPC_SERVER_BOUND:
+ ASSERT(rx->local != NULL);
+ sk->sk_max_ack_backlog = backlog;
+ rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+ ret = 0;
+ break;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * find a transport by address
+ */
+static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
+ struct sockaddr *addr,
+ int addr_len, int flags,
+ gfp_t gfp)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+ struct rxrpc_transport *trans;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct rxrpc_peer *peer;
+
+ _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+ ASSERT(rx->local != NULL);
+ ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
+
+ if (rx->srx.transport_type != srx->transport_type)
+ return ERR_PTR(-ESOCKTNOSUPPORT);
+ if (rx->srx.transport.family != srx->transport.family)
+ return ERR_PTR(-EAFNOSUPPORT);
+
+ /* find a remote transport endpoint from the local one */
+ peer = rxrpc_get_peer(srx, gfp);
+ if (IS_ERR(peer))
+ return ERR_PTR(PTR_ERR(peer));
+
+ /* find a transport */
+ trans = rxrpc_get_transport(rx->local, peer, gfp);
+ rxrpc_put_peer(peer);
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/**
+ * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
+ * @sock: The socket on which to make the call
+ * @srx: The address of the peer to contact (defaults to socket setting)
+ * @key: The security context to use (defaults to socket setting)
+ * @user_call_ID: The ID to use
+ *
+ * Allow a kernel service to begin a call on the nominated socket. This just
+ * sets up all the internal tracking structures and allocates connection and
+ * call IDs as appropriate. The call to be used is returned.
+ *
+ * The default socket destination address and security may be overridden by
+ * supplying @srx and @key.
+ */
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle;
+ struct rxrpc_transport *trans;
+ struct rxrpc_call *call;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ __be16 service_id;
+
+ _enter(",,%x,%lx", key_serial(key), user_call_ID);
+
+ lock_sock(&rx->sk);
+
+ if (srx) {
+ trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
+ sizeof(*srx), 0, gfp);
+ if (IS_ERR(trans)) {
+ call = ERR_PTR(PTR_ERR(trans));
+ trans = NULL;
+ goto out;
+ }
+ } else {
+ trans = rx->trans;
+ if (!trans) {
+ call = ERR_PTR(-ENOTCONN);
+ goto out;
+ }
+ atomic_inc(&trans->usage);
+ }
+
+ service_id = rx->service_id;
+ if (srx)
+ service_id = htons(srx->srx_service);
+
+ if (!key)
+ key = rx->key;
+ if (key && !key->payload.data)
+ key = NULL; /* a no-security key */
+
+ bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
+ if (IS_ERR(bundle)) {
+ call = ERR_PTR(PTR_ERR(bundle));
+ goto out;
+ }
+
+ call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
+ gfp);
+ rxrpc_put_bundle(trans, bundle);
+out:
+ rxrpc_put_transport(trans);
+ release_sock(&rx->sk);
+ _leave(" = %p", call);
+ return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+
+/**
+ * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @call: The call to end
+ *
+ * Allow a kernel service to end a call it was using. The call must be
+ * complete before this is called (the call should be aborted if necessary).
+ */
+void rxrpc_kernel_end_call(struct rxrpc_call *call)
+{
+ _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+ rxrpc_remove_user_ID(call->socket, call);
+ rxrpc_put_call(call);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_end_call);
+
+/**
+ * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * @sock: The socket to intercept received messages on
+ * @interceptor: The function to pass the messages to
+ *
+ * Allow a kernel service to intercept messages heading for the Rx queue on an
+ * RxRPC socket. They get passed to the specified function instead.
+ * @interceptor should free the socket buffers it is given. @interceptor is
+ * called with the socket receive queue spinlock held and softirqs disabled -
+ * this ensures that the messages will be delivered in the right order.
+ */
+void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+ rxrpc_interceptor_t interceptor)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+
+ _enter("");
+ rx->interceptor = interceptor;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+
+/*
+ * connect an RxRPC socket
+ * - this just targets it at a specific destination; no actual connection
+ * negotiation takes place
+ */
+static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+ struct sock *sk = sock->sk;
+ struct rxrpc_transport *trans;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret;
+
+ _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+ ret = rxrpc_validate_address(rx, srx, addr_len);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNCONNECTED:
+ /* find a local transport endpoint if we don't have one already */
+ ASSERTCMP(rx->local, ==, NULL);
+ rx->srx.srx_family = AF_RXRPC;
+ rx->srx.srx_service = 0;
+ rx->srx.transport_type = srx->transport_type;
+ rx->srx.transport_len = sizeof(sa_family_t);
+ rx->srx.transport.family = srx->transport.family;
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ release_sock(&rx->sk);
+ return PTR_ERR(local);
+ }
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ case RXRPC_CLIENT_BOUND:
+ break;
+ case RXRPC_CLIENT_CONNECTED:
+ release_sock(&rx->sk);
+ return -EISCONN;
+ default:
+ release_sock(&rx->sk);
+ return -EBUSY; /* server sockets can't connect as well */
+ }
+
+ trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
+ GFP_KERNEL);
+ if (IS_ERR(trans)) {
+ release_sock(&rx->sk);
+ _leave(" = %ld", PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ rx->trans = trans;
+ rx->service_id = htons(srx->srx_service);
+ rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+
+ release_sock(&rx->sk);
+ return 0;
+}
+
+/*
+ * send a message through an RxRPC socket
+ * - in a client this does a number of things:
+ * - finds/sets up a connection for the security specified (if any)
+ * - initiates a call (ID in control data)
+ * - ends the request phase of a call (if MSG_MORE is not set)
+ * - sends a call data packet
+ * - may send an abort (abort code in control data)
+ */
+static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t len)
+{
+ struct rxrpc_transport *trans;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter(",{%d},,%zu", rx->sk.sk_state, len);
+
+ if (m->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ if (m->msg_name) {
+ ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+ }
+
+ trans = NULL;
+ lock_sock(&rx->sk);
+
+ if (m->msg_name) {
+ ret = -EISCONN;
+ trans = rxrpc_name_to_transport(sock, m->msg_name,
+ m->msg_namelen, 0, GFP_KERNEL);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+ } else {
+ trans = rx->trans;
+ if (trans)
+ atomic_inc(&trans->usage);
+ }
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_SERVER_LISTENING:
+ if (!m->msg_name) {
+ ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+ break;
+ }
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_CLIENT_BOUND:
+ if (!m->msg_name) {
+ ret = -ENOTCONN;
+ break;
+ }
+ case RXRPC_CLIENT_CONNECTED:
+ ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+ break;
+ default:
+ ret = -ENOTCONN;
+ break;
+ }
+
+out:
+ release_sock(&rx->sk);
+ if (trans)
+ rxrpc_put_transport(trans);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * set RxRPC socket options
+ */
+static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int optlen)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ unsigned min_sec_level;
+ int ret;
+
+ _enter(",%d,%d,,%d", level, optname, optlen);
+
+ lock_sock(&rx->sk);
+ ret = -EOPNOTSUPP;
+
+ if (level == SOL_RXRPC) {
+ switch (optname) {
+ case RXRPC_EXCLUSIVE_CONNECTION:
+ ret = -EINVAL;
+ if (optlen != 0)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+ goto success;
+
+ case RXRPC_SECURITY_KEY:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = rxrpc_request_key(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_SECURITY_KEYRING:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = rxrpc_server_keyring(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_MIN_SECURITY_LEVEL:
+ ret = -EINVAL;
+ if (optlen != sizeof(unsigned))
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = get_user(min_sec_level,
+ (unsigned __user *) optval);
+ if (ret < 0)
+ goto error;
+ ret = -EINVAL;
+ if (min_sec_level > RXRPC_SECURITY_MAX)
+ goto error;
+ rx->min_sec_level = min_sec_level;
+ goto success;
+
+ default:
+ break;
+ }
+ }
+
+success:
+ ret = 0;
+error:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/*
+ * permit an RxRPC socket to be polled
+ */
+static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ unsigned int mask;
+ struct sock *sk = sock->sk;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+
+ /* the socket is readable if there are any messages waiting on the Rx
+ * queue */
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* the socket is writable if there is space to add new data to the
+ * socket; there is no guarantee that any particular call in progress
+ * on the socket may have space in the Tx ACK window */
+ if (rxrpc_writable(sk))
+ mask |= POLLOUT | POLLWRNORM;
+
+ return mask;
+}
+
+/*
+ * create an RxRPC socket
+ */
+static int rxrpc_create(struct socket *sock, int protocol)
+{
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%p,%d", sock, protocol);
+
+ /* we support transport protocol UDP only */
+ if (protocol != PF_INET)
+ return -EPROTONOSUPPORT;
+
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ sock->ops = &rxrpc_rpc_ops;
+ sock->state = SS_UNCONNECTED;
+
+ sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+ sk->sk_state = RXRPC_UNCONNECTED;
+ sk->sk_write_space = rxrpc_write_space;
+ sk->sk_max_ack_backlog = sysctl_rxrpc_max_qlen;
+ sk->sk_destruct = rxrpc_sock_destructor;
+
+ rx = rxrpc_sk(sk);
+ rx->proto = protocol;
+ rx->calls = RB_ROOT;
+
+ INIT_LIST_HEAD(&rx->listen_link);
+ INIT_LIST_HEAD(&rx->secureq);
+ INIT_LIST_HEAD(&rx->acceptq);
+ rwlock_init(&rx->call_lock);
+ memset(&rx->srx, 0, sizeof(rx->srx));
+
+ _leave(" = 0 [%p]", rx);
+ return 0;
+}
+
+/*
+ * RxRPC socket destructor
+ */
+static void rxrpc_sock_destructor(struct sock *sk)
+{
+ _enter("%p", sk);
+
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+ BUG_TRAP(sk_unhashed(sk));
+ BUG_TRAP(!sk->sk_socket);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ printk("Attempt to release alive rxrpc socket: %p\n", sk);
+ return;
+ }
+}
+
+/*
+ * release an RxRPC socket
+ */
+static int rxrpc_release_sock(struct sock *sk)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+
+ _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+
+ /* declare the socket closed for business */
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ sk->sk_state = RXRPC_CLOSE;
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
+
+ if (!list_empty(&rx->listen_link)) {
+ write_lock_bh(&rx->local->services_lock);
+ list_del(&rx->listen_link);
+ write_unlock_bh(&rx->local->services_lock);
+ }
+
+ /* try to flush out this socket */
+ rxrpc_release_calls_on_socket(rx);
+ flush_workqueue(rxrpc_workqueue);
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ if (rx->conn) {
+ rxrpc_put_connection(rx->conn);
+ rx->conn = NULL;
+ }
+
+ if (rx->bundle) {
+ rxrpc_put_bundle(rx->trans, rx->bundle);
+ rx->bundle = NULL;
+ }
+ if (rx->trans) {
+ rxrpc_put_transport(rx->trans);
+ rx->trans = NULL;
+ }
+ if (rx->local) {
+ rxrpc_put_local(rx->local);
+ rx->local = NULL;
+ }
+
+ key_put(rx->key);
+ rx->key = NULL;
+ key_put(rx->securities);
+ rx->securities = NULL;
+ sock_put(sk);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * release an RxRPC BSD socket on close() or equivalent
+ */
+static int rxrpc_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ _enter("%p{%p}", sock, sk);
+
+ if (!sk)
+ return 0;
+
+ sock->sk = NULL;
+
+ return rxrpc_release_sock(sk);
+}
+
+/*
+ * RxRPC network protocol
+ */
+static const struct proto_ops rxrpc_rpc_ops = {
+ .family = PF_UNIX,
+ .owner = THIS_MODULE,
+ .release = rxrpc_release,
+ .bind = rxrpc_bind,
+ .connect = rxrpc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = rxrpc_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = rxrpc_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = rxrpc_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = rxrpc_sendmsg,
+ .recvmsg = rxrpc_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto rxrpc_proto = {
+ .name = "RXRPC",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct rxrpc_sock),
+ .max_header = sizeof(struct rxrpc_header),
+};
+
+static struct net_proto_family rxrpc_family_ops = {
+ .family = PF_RXRPC,
+ .create = rxrpc_create,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * initialise and register the RxRPC protocol
+ */
+static int __init af_rxrpc_init(void)
+{
+ struct sk_buff *dummy_skb;
+ int ret = -1;
+
+ BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+
+ rxrpc_epoch = htonl(xtime.tv_sec);
+
+ ret = -ENOMEM;
+ rxrpc_call_jar = kmem_cache_create(
+ "rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!rxrpc_call_jar) {
+ printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+ goto error_call_jar;
+ }
+
+ rxrpc_workqueue = create_workqueue("krxrpcd");
+ if (!rxrpc_workqueue) {
+ printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+ goto error_work_queue;
+ }
+
+ ret = proto_register(&rxrpc_proto, 1);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+ goto error_proto;
+ }
+
+ ret = sock_register(&rxrpc_family_ops);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+ goto error_sock;
+ }
+
+ ret = register_key_type(&key_type_rxrpc);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+ goto error_key_type;
+ }
+
+ ret = register_key_type(&key_type_rxrpc_s);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+ goto error_key_type_s;
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+ proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+#endif
+ return 0;
+
+error_key_type_s:
+ unregister_key_type(&key_type_rxrpc);
+error_key_type:
+ sock_unregister(PF_RXRPC);
+error_sock:
+ proto_unregister(&rxrpc_proto);
+error_proto:
+ destroy_workqueue(rxrpc_workqueue);
+error_work_queue:
+ kmem_cache_destroy(rxrpc_call_jar);
+error_call_jar:
+ return ret;
+}
+
+/*
+ * unregister the RxRPC protocol
+ */
+static void __exit af_rxrpc_exit(void)
+{
+ _enter("");
+ unregister_key_type(&key_type_rxrpc_s);
+ unregister_key_type(&key_type_rxrpc);
+ sock_unregister(PF_RXRPC);
+ proto_unregister(&rxrpc_proto);
+ rxrpc_destroy_all_calls();
+ rxrpc_destroy_all_connections();
+ rxrpc_destroy_all_transports();
+ rxrpc_destroy_all_peers();
+ rxrpc_destroy_all_locals();
+
+ ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+
+ _debug("flush scheduled work");
+ flush_workqueue(rxrpc_workqueue);
+ proc_net_remove("rxrpc_conns");
+ proc_net_remove("rxrpc_calls");
+ destroy_workqueue(rxrpc_workqueue);
+ kmem_cache_destroy(rxrpc_call_jar);
+ _leave("");
+}
+
+module_init(af_rxrpc_init);
+module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
new file mode 100644
index 000000000000..92a87fde8bfe
--- /dev/null
+++ b/net/rxrpc/ar-accept.c
@@ -0,0 +1,504 @@
+/* incoming call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
+ struct rxrpc_header *hdr)
+{
+ struct msghdr msg;
+ struct kvec iov[1];
+ size_t len;
+ int ret;
+
+ _enter("%d,,", local->debug_id);
+
+ msg.msg_name = &srx->transport.sin;
+ msg.msg_namelen = sizeof(srx->transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->seq = 0;
+ hdr->type = RXRPC_PACKET_TYPE_BUSY;
+ hdr->flags = 0;
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+
+ len = iov[0].iov_len;
+
+ hdr->serial = htonl(1);
+ _proto("Tx BUSY %%%u", ntohl(hdr->serial));
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
+ if (ret < 0) {
+ _leave(" = -EAGAIN [sendmsg failed: %d]", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * accept an incoming call that needs peer, transport and/or connection setting
+ * up
+ */
+static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_sock *rx,
+ struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ struct rxrpc_skb_priv *sp, *nsp;
+ struct rxrpc_peer *peer;
+ struct rxrpc_call *call;
+ struct sk_buff *notification;
+ int ret;
+
+ _enter("");
+
+ sp = rxrpc_skb(skb);
+
+ /* get a notification message to send to the server app */
+ notification = alloc_skb(0, GFP_NOFS);
+ rxrpc_new_skb(notification);
+ notification->mark = RXRPC_SKB_MARK_NEW_CALL;
+
+ peer = rxrpc_get_peer(srx, GFP_NOIO);
+ if (IS_ERR(peer)) {
+ _debug("no peer");
+ ret = -EBUSY;
+ goto error;
+ }
+
+ trans = rxrpc_get_transport(local, peer, GFP_NOIO);
+ rxrpc_put_peer(peer);
+ if (!trans) {
+ _debug("no trans");
+ ret = -EBUSY;
+ goto error;
+ }
+
+ conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO);
+ rxrpc_put_transport(trans);
+ if (IS_ERR(conn)) {
+ _debug("no conn");
+ ret = PTR_ERR(conn);
+ goto error;
+ }
+
+ call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO);
+ rxrpc_put_connection(conn);
+ if (IS_ERR(call)) {
+ _debug("no call");
+ ret = PTR_ERR(call);
+ goto error;
+ }
+
+ /* attach the call to the socket */
+ read_lock_bh(&local->services_lock);
+ if (rx->sk.sk_state == RXRPC_CLOSE)
+ goto invalid_service;
+
+ write_lock(&rx->call_lock);
+ if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
+ rxrpc_get_call(call);
+
+ spin_lock(&call->conn->state_lock);
+ if (sp->hdr.securityIndex > 0 &&
+ call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+ _debug("await conn sec");
+ list_add_tail(&call->accept_link, &rx->secureq);
+ call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
+ atomic_inc(&call->conn->usage);
+ set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+ rxrpc_queue_conn(call->conn);
+ } else {
+ _debug("conn ready");
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ list_add_tail(&call->accept_link, &rx->acceptq);
+ rxrpc_get_call(call);
+ nsp = rxrpc_skb(notification);
+ nsp->call = call;
+
+ ASSERTCMP(atomic_read(&call->usage), >=, 3);
+
+ _debug("notify");
+ spin_lock(&call->lock);
+ ret = rxrpc_queue_rcv_skb(call, notification, true,
+ false);
+ spin_unlock(&call->lock);
+ notification = NULL;
+ if (ret < 0)
+ BUG();
+ }
+ spin_unlock(&call->conn->state_lock);
+
+ _debug("queued");
+ }
+ write_unlock(&rx->call_lock);
+
+ _debug("process");
+ rxrpc_fast_process_packet(call, skb);
+
+ _debug("done");
+ read_unlock_bh(&local->services_lock);
+ rxrpc_free_skb(notification);
+ rxrpc_put_call(call);
+ _leave(" = 0");
+ return 0;
+
+invalid_service:
+ _debug("invalid");
+ read_unlock_bh(&local->services_lock);
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ rxrpc_get_call(call);
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+ rxrpc_put_call(call);
+ ret = -ECONNREFUSED;
+error:
+ rxrpc_free_skb(notification);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * accept incoming calls that need peer, transport and/or connection setting up
+ * - the packets we get are all incoming client DATA packets that have seq == 1
+ */
+void rxrpc_accept_incoming_calls(struct work_struct *work)
+{
+ struct rxrpc_local *local =
+ container_of(work, struct rxrpc_local, acceptor);
+ struct rxrpc_skb_priv *sp;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_sock *rx;
+ struct sk_buff *skb;
+ __be16 service_id;
+ int ret;
+
+ _enter("%d", local->debug_id);
+
+ read_lock_bh(&rxrpc_local_lock);
+ if (atomic_read(&local->usage) > 0)
+ rxrpc_get_local(local);
+ else
+ local = NULL;
+ read_unlock_bh(&rxrpc_local_lock);
+ if (!local) {
+ _leave(" [local dead]");
+ return;
+ }
+
+process_next_packet:
+ skb = skb_dequeue(&local->accept_queue);
+ if (!skb) {
+ rxrpc_put_local(local);
+ _leave("\n");
+ return;
+ }
+
+ _net("incoming call skb %p", skb);
+
+ sp = rxrpc_skb(skb);
+
+ /* determine the remote address */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.transport.family = local->srx.transport.family;
+ srx.transport_type = local->srx.transport_type;
+ switch (srx.transport.family) {
+ case AF_INET:
+ srx.transport_len = sizeof(struct sockaddr_in);
+ srx.transport.sin.sin_port = udp_hdr(skb)->source;
+ srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ break;
+ default:
+ goto busy;
+ }
+
+ /* get the socket providing the service */
+ service_id = sp->hdr.serviceId;
+ read_lock_bh(&local->services_lock);
+ list_for_each_entry(rx, &local->services, listen_link) {
+ if (rx->service_id == service_id &&
+ rx->sk.sk_state != RXRPC_CLOSE)
+ goto found_service;
+ }
+ read_unlock_bh(&local->services_lock);
+ goto invalid_service;
+
+found_service:
+ _debug("found service %hd", ntohs(rx->service_id));
+ if (sk_acceptq_is_full(&rx->sk))
+ goto backlog_full;
+ sk_acceptq_added(&rx->sk);
+ sock_hold(&rx->sk);
+ read_unlock_bh(&local->services_lock);
+
+ ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
+ if (ret < 0)
+ sk_acceptq_removed(&rx->sk);
+ sock_put(&rx->sk);
+ switch (ret) {
+ case -ECONNRESET: /* old calls are ignored */
+ case -ECONNABORTED: /* aborted calls are reaborted or ignored */
+ case 0:
+ goto process_next_packet;
+ case -ECONNREFUSED:
+ goto invalid_service;
+ case -EBUSY:
+ goto busy;
+ case -EKEYREJECTED:
+ goto security_mismatch;
+ default:
+ BUG();
+ }
+
+backlog_full:
+ read_unlock_bh(&local->services_lock);
+busy:
+ rxrpc_busy(local, &srx, &sp->hdr);
+ rxrpc_free_skb(skb);
+ goto process_next_packet;
+
+invalid_service:
+ skb->priority = RX_INVALID_OPERATION;
+ rxrpc_reject_packet(local, skb);
+ goto process_next_packet;
+
+ /* can't change connection security type mid-flow */
+security_mismatch:
+ skb->priority = RX_PROTOCOL_ERROR;
+ rxrpc_reject_packet(local, skb);
+ goto process_next_packet;
+}
+
+/*
+ * handle acceptance of a call by userspace
+ * - assign the user call ID to the call at the front of the queue
+ */
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *parent, **pp;
+ int ret;
+
+ _enter(",%lx", user_call_ID);
+
+ ASSERT(!irqs_disabled());
+
+ write_lock(&rx->call_lock);
+
+ ret = -ENODATA;
+ if (list_empty(&rx->acceptq))
+ goto out;
+
+ /* check the user ID isn't already in use */
+ ret = -EBADSLT;
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto out;
+ }
+
+ /* dequeue the first call and check it's still valid */
+ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ break;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ ret = -ECONNABORTED;
+ goto out_release;
+ case RXRPC_CALL_NETWORK_ERROR:
+ ret = call->conn->error;
+ goto out_release;
+ case RXRPC_CALL_DEAD:
+ ret = -ETIME;
+ goto out_discard;
+ default:
+ BUG();
+ }
+
+ /* formalise the acceptance */
+ call->user_call_ID = user_call_ID;
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
+ BUG();
+ if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
+ BUG();
+ rxrpc_queue_call(call);
+
+ rxrpc_get_call(call);
+ write_unlock_bh(&call->state_lock);
+ write_unlock(&rx->call_lock);
+ _leave(" = %p{%d}", call, call->debug_id);
+ return call;
+
+ /* if the call is already dying or dead, then we leave the socket's ref
+ * on it to be released by rxrpc_dead_call_expired() as induced by
+ * rxrpc_release_call() */
+out_release:
+ _debug("release %p", call);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+out_discard:
+ write_unlock_bh(&call->state_lock);
+ _debug("discard %p", call);
+out:
+ write_unlock(&rx->call_lock);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * handle rejectance of a call by userspace
+ * - reject the call at the front of the queue
+ */
+int rxrpc_reject_call(struct rxrpc_sock *rx)
+{
+ struct rxrpc_call *call;
+ int ret;
+
+ _enter("");
+
+ ASSERT(!irqs_disabled());
+
+ write_lock(&rx->call_lock);
+
+ ret = -ENODATA;
+ if (list_empty(&rx->acceptq))
+ goto out;
+
+ /* dequeue the first call and check it's still valid */
+ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ call->state = RXRPC_CALL_SERVER_BUSY;
+ if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events))
+ rxrpc_queue_call(call);
+ ret = 0;
+ goto out_release;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ ret = -ECONNABORTED;
+ goto out_release;
+ case RXRPC_CALL_NETWORK_ERROR:
+ ret = call->conn->error;
+ goto out_release;
+ case RXRPC_CALL_DEAD:
+ ret = -ETIME;
+ goto out_discard;
+ default:
+ BUG();
+ }
+
+ /* if the call is already dying or dead, then we leave the socket's ref
+ * on it to be released by rxrpc_dead_call_expired() as induced by
+ * rxrpc_release_call() */
+out_release:
+ _debug("release %p", call);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+out_discard:
+ write_unlock_bh(&call->state_lock);
+ _debug("discard %p", call);
+out:
+ write_unlock(&rx->call_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
+ * @sock: The socket on which the impending call is waiting
+ * @user_call_ID: The tag to attach to the call
+ *
+ * Allow a kernel service to accept an incoming call, assuming the incoming
+ * call is still valid.
+ */
+struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+
+ _enter(",%lx", user_call_ID);
+ call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
+ _leave(" = %p", call);
+ return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_accept_call);
+
+/**
+ * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
+ * @sock: The socket on which the impending call is waiting
+ *
+ * Allow a kernel service to reject an incoming call with a BUSY message,
+ * assuming the incoming call is still valid.
+ */
+int rxrpc_kernel_reject_call(struct socket *sock)
+{
+ int ret;
+
+ _enter("");
+ ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
new file mode 100644
index 000000000000..657ee69f2133
--- /dev/null
+++ b/net/rxrpc/ar-ack.c
@@ -0,0 +1,1306 @@
+/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static unsigned rxrpc_ack_defer = 1;
+
+static const char *rxrpc_acks[] = {
+ "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
+ "-?-"
+};
+
+static const s8 rxrpc_ack_priority[] = {
+ [0] = 0,
+ [RXRPC_ACK_DELAY] = 1,
+ [RXRPC_ACK_REQUESTED] = 2,
+ [RXRPC_ACK_IDLE] = 3,
+ [RXRPC_ACK_PING_RESPONSE] = 4,
+ [RXRPC_ACK_DUPLICATE] = 5,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 6,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 7,
+ [RXRPC_ACK_NOSPACE] = 8,
+};
+
+/*
+ * propose an ACK be sent
+ */
+void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+ __be32 serial, bool immediate)
+{
+ unsigned long expiry;
+ s8 prior = rxrpc_ack_priority[ack_reason];
+
+ ASSERTCMP(prior, >, 0);
+
+ _enter("{%d},%s,%%%x,%u",
+ call->debug_id, rxrpc_acks[ack_reason], ntohl(serial),
+ immediate);
+
+ if (prior < rxrpc_ack_priority[call->ackr_reason]) {
+ if (immediate)
+ goto cancel_timer;
+ return;
+ }
+
+ /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+ * numbers */
+ if (prior == rxrpc_ack_priority[call->ackr_reason]) {
+ if (prior <= 4)
+ call->ackr_serial = serial;
+ if (immediate)
+ goto cancel_timer;
+ return;
+ }
+
+ call->ackr_reason = ack_reason;
+ call->ackr_serial = serial;
+
+ switch (ack_reason) {
+ case RXRPC_ACK_DELAY:
+ _debug("run delay timer");
+ call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
+ add_timer(&call->ack_timer);
+ return;
+
+ case RXRPC_ACK_IDLE:
+ if (!immediate) {
+ _debug("run defer timer");
+ expiry = 1;
+ goto run_timer;
+ }
+ goto cancel_timer;
+
+ case RXRPC_ACK_REQUESTED:
+ if (!rxrpc_ack_defer)
+ goto cancel_timer;
+ if (!immediate || serial == cpu_to_be32(1)) {
+ _debug("run defer timer");
+ expiry = rxrpc_ack_defer;
+ goto run_timer;
+ }
+
+ default:
+ _debug("immediate ACK");
+ goto cancel_timer;
+ }
+
+run_timer:
+ expiry += jiffies;
+ if (!timer_pending(&call->ack_timer) ||
+ time_after(call->ack_timer.expires, expiry))
+ mod_timer(&call->ack_timer, expiry);
+ return;
+
+cancel_timer:
+ _debug("cancel timer %%%u", ntohl(serial));
+ try_to_del_timer_sync(&call->ack_timer);
+ read_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * propose an ACK be sent, locking the call structure
+ */
+void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+ __be32 serial, bool immediate)
+{
+ s8 prior = rxrpc_ack_priority[ack_reason];
+
+ if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+ spin_lock_bh(&call->lock);
+ __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+ spin_unlock_bh(&call->lock);
+ }
+}
+
+/*
+ * set the resend timer
+ */
+static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
+ unsigned long resend_at)
+{
+ read_lock_bh(&call->state_lock);
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ resend = 0;
+
+ if (resend & 1) {
+ _debug("SET RESEND");
+ set_bit(RXRPC_CALL_RESEND, &call->events);
+ }
+
+ if (resend & 2) {
+ _debug("MODIFY RESEND TIMER");
+ set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ mod_timer(&call->resend_timer, resend_at);
+ } else {
+ _debug("KILL RESEND TIMER");
+ del_timer_sync(&call->resend_timer);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * resend packets
+ */
+static void rxrpc_resend(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_header *hdr;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop, stop;
+ u8 resend;
+
+ _enter("{%d,%d,%d,%d},",
+ call->acks_hard, call->acks_unacked,
+ atomic_read(&call->sequence),
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+ stop = 0;
+ resend = 0;
+ resend_at = 0;
+
+ for (loop = call->acks_tail;
+ loop != call->acks_head || stop;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ if (*p_txb & 1)
+ continue;
+
+ txb = (struct sk_buff *) *p_txb;
+ sp = rxrpc_skb(txb);
+
+ if (sp->need_resend) {
+ sp->need_resend = 0;
+
+ /* each Tx packet has a new serial number */
+ sp->hdr.serial =
+ htonl(atomic_inc_return(&call->conn->serial));
+
+ hdr = (struct rxrpc_header *) txb->head;
+ hdr->serial = sp->hdr.serial;
+
+ _proto("Tx DATA %%%u { #%d }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+ if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+ stop = 0;
+ sp->resend_at = jiffies + 3;
+ } else {
+ sp->resend_at =
+ jiffies + rxrpc_resend_timeout * HZ;
+ }
+ }
+
+ if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave("");
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_timer(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop;
+ u8 resend;
+
+ _enter("%d,%d,%d",
+ call->acks_tail, call->acks_unacked, call->acks_head);
+
+ resend = 0;
+ resend_at = 0;
+
+ for (loop = call->acks_unacked;
+ loop != call->acks_head;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ ASSERT(!(*p_txb & 1));
+
+ if (sp->need_resend) {
+ ;
+ } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave("");
+}
+
+/*
+ * process soft ACKs of our transmitted packets
+ * - these indicate packets the peer has or has not received, but hasn't yet
+ * given to the consumer, and so can still be discarded and re-requested
+ */
+static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
+ struct rxrpc_ackpacket *ack,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop;
+ u8 sacks[RXRPC_MAXACKS], resend;
+
+ _enter("{%d,%d},{%d},",
+ call->acks_hard,
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
+ ack->nAcks);
+
+ if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
+ goto protocol_error;
+
+ resend = 0;
+ resend_at = 0;
+ for (loop = 0; loop < ack->nAcks; loop++) {
+ p_txb = call->acks_window;
+ p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ switch (sacks[loop]) {
+ case RXRPC_ACK_TYPE_ACK:
+ sp->need_resend = 0;
+ *p_txb |= 1;
+ break;
+ case RXRPC_ACK_TYPE_NACK:
+ sp->need_resend = 1;
+ *p_txb &= ~1;
+ resend = 1;
+ break;
+ default:
+ _debug("Unsupported ACK type %d", sacks[loop]);
+ goto protocol_error;
+ }
+ }
+
+ smp_mb();
+ call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
+
+ /* anything not explicitly ACK'd is implicitly NACK'd, but may just not
+ * have been received or processed yet by the far end */
+ for (loop = call->acks_unacked;
+ loop != call->acks_head;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ if (*p_txb & 1) {
+ /* packet must have been discarded */
+ sp->need_resend = 1;
+ *p_txb &= ~1;
+ resend |= 1;
+ } else if (sp->need_resend) {
+ ;
+ } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave(" = 0");
+ return 0;
+
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+}
+
+/*
+ * discard hard-ACK'd packets from the Tx window
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
+{
+ struct rxrpc_skb_priv *sp;
+ unsigned long _skb;
+ int tail = call->acks_tail, old_tail;
+ int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+
+ _enter("{%u,%u},%u", call->acks_hard, win, hard);
+
+ ASSERTCMP(hard - call->acks_hard, <=, win);
+
+ while (call->acks_hard < hard) {
+ smp_read_barrier_depends();
+ _skb = call->acks_window[tail] & ~1;
+ sp = rxrpc_skb((struct sk_buff *) _skb);
+ rxrpc_free_skb((struct sk_buff *) _skb);
+ old_tail = tail;
+ tail = (tail + 1) & (call->acks_winsz - 1);
+ call->acks_tail = tail;
+ if (call->acks_unacked == old_tail)
+ call->acks_unacked = tail;
+ call->acks_hard++;
+ }
+
+ wake_up(&call->tx_waitq);
+}
+
+/*
+ * clear the Tx window in the event of a failure
+ */
+static void rxrpc_clear_tx_window(struct rxrpc_call *call)
+{
+ rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
+}
+
+/*
+ * drain the out of sequence received packet queue into the packet Rx queue
+ */
+static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ bool terminal;
+ int ret;
+
+ _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
+
+ spin_lock_bh(&call->lock);
+
+ ret = -ECONNRESET;
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
+ goto socket_unavailable;
+
+ skb = skb_dequeue(&call->rx_oos_queue);
+ if (skb) {
+ sp = rxrpc_skb(skb);
+
+ _debug("drain OOS packet %d [%d]",
+ ntohl(sp->hdr.seq), call->rx_first_oos);
+
+ if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
+ skb_queue_head(&call->rx_oos_queue, skb);
+ call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
+ _debug("requeue %p {%u}", skb, call->rx_first_oos);
+ } else {
+ skb->mark = RXRPC_SKB_MARK_DATA;
+ terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+ ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
+ BUG_ON(ret < 0);
+ _debug("drain #%u", call->rx_data_post);
+ call->rx_data_post++;
+
+ /* find out what the next packet is */
+ skb = skb_peek(&call->rx_oos_queue);
+ if (skb)
+ call->rx_first_oos =
+ ntohl(rxrpc_skb(skb)->hdr.seq);
+ else
+ call->rx_first_oos = 0;
+ _debug("peek %p {%u}", skb, call->rx_first_oos);
+ }
+ }
+
+ ret = 0;
+socket_unavailable:
+ spin_unlock_bh(&call->lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * insert an out of sequence packet into the buffer
+ */
+static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp, *psp;
+ struct sk_buff *p;
+ u32 seq;
+
+ sp = rxrpc_skb(skb);
+ seq = ntohl(sp->hdr.seq);
+ _enter(",,{%u}", seq);
+
+ skb->destructor = rxrpc_packet_destructor;
+ ASSERTCMP(sp->call, ==, NULL);
+ sp->call = call;
+ rxrpc_get_call(call);
+
+ /* insert into the buffer in sequence order */
+ spin_lock_bh(&call->lock);
+
+ skb_queue_walk(&call->rx_oos_queue, p) {
+ psp = rxrpc_skb(p);
+ if (ntohl(psp->hdr.seq) > seq) {
+ _debug("insert oos #%u before #%u",
+ seq, ntohl(psp->hdr.seq));
+ skb_insert(p, skb, &call->rx_oos_queue);
+ goto inserted;
+ }
+ }
+
+ _debug("append oos #%u", seq);
+ skb_queue_tail(&call->rx_oos_queue, skb);
+inserted:
+
+ /* we might now have a new front to the queue */
+ if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
+ call->rx_first_oos = seq;
+
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ call->rx_data_post == call->rx_first_oos) {
+ _debug("drain rx oos now");
+ set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+ }
+ read_unlock(&call->state_lock);
+
+ spin_unlock_bh(&call->lock);
+ _leave(" [stored #%u]", call->rx_first_oos);
+}
+
+/*
+ * clear the Tx window on final ACK reception
+ */
+static void rxrpc_zap_tx_window(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ unsigned long _skb, *acks_window;
+ uint8_t winsz = call->acks_winsz;
+ int tail;
+
+ acks_window = call->acks_window;
+ call->acks_window = NULL;
+
+ while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
+ tail = call->acks_tail;
+ smp_read_barrier_depends();
+ _skb = acks_window[tail] & ~1;
+ smp_mb();
+ call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
+
+ skb = (struct sk_buff *) _skb;
+ sp = rxrpc_skb(skb);
+ _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+ rxrpc_free_skb(skb);
+ }
+
+ kfree(acks_window);
+}
+
+/*
+ * process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned latest, int nAcks)
+{
+ struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_peer *peer;
+ unsigned mtu;
+
+ if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
+ _leave(" [no ackinfo]");
+ return;
+ }
+
+ _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+ latest,
+ ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
+ ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
+
+ mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
+
+ peer = call->conn->trans->peer;
+ if (mtu < peer->maxdata) {
+ spin_lock_bh(&peer->lock);
+ peer->maxdata = mtu;
+ peer->mtu = mtu + peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+ }
+}
+
+/*
+ * process packets in the reception queue
+ */
+static int rxrpc_process_rx_queue(struct rxrpc_call *call,
+ u32 *_abort_code)
+{
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ bool post_ACK;
+ int latest;
+ u32 hard, tx;
+
+ _enter("");
+
+process_further:
+ skb = skb_dequeue(&call->rx_queue);
+ if (!skb)
+ return -EAGAIN;
+
+ _net("deferred skb %p", skb);
+
+ sp = rxrpc_skb(skb);
+
+ _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
+
+ post_ACK = false;
+
+ switch (sp->hdr.type) {
+ /* data packets that wind up here have been received out of
+ * order, need security processing or are jumbo packets */
+ case RXRPC_PACKET_TYPE_DATA:
+ _proto("OOSQ DATA %%%u { #%u }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+ /* secured packets must be verified and possibly decrypted */
+ if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
+ goto protocol_error;
+
+ rxrpc_insert_oos_packet(call, skb);
+ goto process_further;
+
+ /* partial ACK to process */
+ case RXRPC_PACKET_TYPE_ACK:
+ if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
+ _debug("extraction failure");
+ goto protocol_error;
+ }
+ if (!skb_pull(skb, sizeof(ack)))
+ BUG();
+
+ latest = ntohl(sp->hdr.serial);
+ hard = ntohl(ack.firstPacket);
+ tx = atomic_read(&call->sequence);
+
+ _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+ latest,
+ ntohs(ack.maxSkew),
+ hard,
+ ntohl(ack.previousPacket),
+ ntohl(ack.serial),
+ rxrpc_acks[ack.reason],
+ ack.nAcks);
+
+ rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
+
+ if (ack.reason == RXRPC_ACK_PING) {
+ _proto("Rx ACK %%%u PING Request", latest);
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+ sp->hdr.serial, true);
+ }
+
+ /* discard any out-of-order or duplicate ACKs */
+ if (latest - call->acks_latest <= 0) {
+ _debug("discard ACK %d <= %d",
+ latest, call->acks_latest);
+ goto discard;
+ }
+ call->acks_latest = latest;
+
+ if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
+ call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
+ goto discard;
+
+ _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
+
+ if (hard > 0) {
+ if (hard - 1 > tx) {
+ _debug("hard-ACK'd packet %d not transmitted"
+ " (%d top)",
+ hard - 1, tx);
+ goto protocol_error;
+ }
+
+ if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
+ call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
+ hard > tx)
+ goto all_acked;
+
+ smp_rmb();
+ rxrpc_rotate_tx_window(call, hard - 1);
+ }
+
+ if (ack.nAcks > 0) {
+ if (hard - 1 + ack.nAcks > tx) {
+ _debug("soft-ACK'd packet %d+%d not"
+ " transmitted (%d top)",
+ hard - 1, ack.nAcks, tx);
+ goto protocol_error;
+ }
+
+ if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
+ goto protocol_error;
+ }
+ goto discard;
+
+ /* complete ACK to process */
+ case RXRPC_PACKET_TYPE_ACKALL:
+ goto all_acked;
+
+ /* abort and busy are handled elsewhere */
+ case RXRPC_PACKET_TYPE_BUSY:
+ case RXRPC_PACKET_TYPE_ABORT:
+ BUG();
+
+ /* connection level events - also handled elsewhere */
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ case RXRPC_PACKET_TYPE_DEBUG:
+ BUG();
+ }
+
+ /* if we've had a hard ACK that covers all the packets we've sent, then
+ * that ends that phase of the operation */
+all_acked:
+ write_lock_bh(&call->state_lock);
+ _debug("ack all %d", call->state);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ break;
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ _debug("srv complete");
+ call->state = RXRPC_CALL_COMPLETE;
+ post_ACK = true;
+ break;
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ goto protocol_error_unlock; /* can't occur yet */
+ default:
+ write_unlock_bh(&call->state_lock);
+ goto discard; /* assume packet left over from earlier phase */
+ }
+
+ write_unlock_bh(&call->state_lock);
+
+ /* if all the packets we sent are hard-ACK'd, then we can discard
+ * whatever we've got left */
+ _debug("clear Tx %d",
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+ del_timer_sync(&call->resend_timer);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+
+ if (call->acks_window)
+ rxrpc_zap_tx_window(call);
+
+ if (post_ACK) {
+ /* post the final ACK message for userspace to pick up */
+ _debug("post ACK");
+ skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
+ sp->call = call;
+ rxrpc_get_call(call);
+ spin_lock_bh(&call->lock);
+ if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
+ BUG();
+ spin_unlock_bh(&call->lock);
+ goto process_further;
+ }
+
+discard:
+ rxrpc_free_skb(skb);
+ goto process_further;
+
+protocol_error_unlock:
+ write_unlock_bh(&call->state_lock);
+protocol_error:
+ rxrpc_free_skb(skb);
+ _leave(" = -EPROTO");
+ return -EPROTO;
+}
+
+/*
+ * post a message to the socket Rx queue for recvmsg() to pick up
+ */
+static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
+ bool fatal)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ int ret;
+
+ _enter("{%d,%lx},%u,%u,%d",
+ call->debug_id, call->flags, mark, error, fatal);
+
+ /* remove timers and things for fatal messages */
+ if (fatal) {
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+
+ if (mark != RXRPC_SKB_MARK_NEW_CALL &&
+ !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ _leave("[no userid]");
+ return 0;
+ }
+
+ if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+ skb = alloc_skb(0, GFP_NOFS);
+ if (!skb)
+ return -ENOMEM;
+
+ rxrpc_new_skb(skb);
+
+ skb->mark = mark;
+
+ sp = rxrpc_skb(skb);
+ memset(sp, 0, sizeof(*sp));
+ sp->error = error;
+ sp->call = call;
+ rxrpc_get_call(call);
+
+ spin_lock_bh(&call->lock);
+ ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
+ spin_unlock_bh(&call->lock);
+ if (ret < 0)
+ BUG();
+ }
+
+ return 0;
+}
+
+/*
+ * handle background processing of incoming call packets and ACK / abort
+ * generation
+ */
+void rxrpc_process_call(struct work_struct *work)
+{
+ struct rxrpc_call *call =
+ container_of(work, struct rxrpc_call, processor);
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[5];
+ unsigned long bits;
+ __be32 data, pad;
+ size_t len;
+ int genbit, loop, nbit, ioc, ret, mtu;
+ u32 abort_code = RX_PROTOCOL_ERROR;
+ u8 *acks = NULL;
+
+ //printk("\n--------------------\n");
+ _enter("{%d,%s,%lx} [%lu]",
+ call->debug_id, rxrpc_call_states[call->state], call->events,
+ (jiffies - call->creation_jif) / (HZ / 10));
+
+ if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
+ _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
+ return;
+ }
+
+ /* there's a good chance we're going to have to send a message, so set
+ * one up in advance */
+ msg.msg_name = &call->conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = call->conn->epoch;
+ hdr.cid = call->cid;
+ hdr.callNumber = call->call_id;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_ACK;
+ hdr.flags = call->conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = call->conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = call->conn->service_id;
+
+ memset(iov, 0, sizeof(iov));
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+
+ /* deal with events of a final nature */
+ if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ rxrpc_release_call(call);
+ clear_bit(RXRPC_CALL_RELEASE, &call->events);
+ }
+
+ if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
+ int error;
+
+ clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+ error = call->conn->trans->peer->net_error;
+ _debug("post net error %d", error);
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
+ error, true) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
+ ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+ clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+ _debug("post conn abort");
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ call->conn->error, true) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
+ hdr.type = RXRPC_PACKET_TYPE_BUSY;
+ genbit = RXRPC_CALL_REJECT_BUSY;
+ goto send_message;
+ }
+
+ if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
+ ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ ECONNABORTED, true) < 0)
+ goto no_mem;
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+ data = htonl(call->abort_code);
+ iov[1].iov_base = &data;
+ iov[1].iov_len = sizeof(data);
+ genbit = RXRPC_CALL_ABORT;
+ goto send_message;
+ }
+
+ if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
+ genbit = RXRPC_CALL_ACK_FINAL;
+
+ ack.bufferSpace = htons(8);
+ ack.maxSkew = 0;
+ ack.serial = 0;
+ ack.reason = RXRPC_ACK_IDLE;
+ ack.nAcks = 0;
+ call->ackr_reason = 0;
+
+ spin_lock_bh(&call->lock);
+ ack.serial = call->ackr_serial;
+ ack.previousPacket = call->ackr_prev_seq;
+ ack.firstPacket = htonl(call->rx_data_eaten + 1);
+ spin_unlock_bh(&call->lock);
+
+ pad = 0;
+
+ iov[1].iov_base = &ack;
+ iov[1].iov_len = sizeof(ack);
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = 3;
+ iov[3].iov_base = &ackinfo;
+ iov[3].iov_len = sizeof(ackinfo);
+ goto send_ACK;
+ }
+
+ if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
+ (1 << RXRPC_CALL_RCVD_ABORT))
+ ) {
+ u32 mark;
+
+ if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
+ mark = RXRPC_SKB_MARK_REMOTE_ABORT;
+ else
+ mark = RXRPC_SKB_MARK_BUSY;
+
+ _debug("post abort/busy");
+ rxrpc_clear_tx_window(call);
+ if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
+ goto no_mem;
+
+ clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
+ _debug("do implicit ackall");
+ rxrpc_clear_tx_window(call);
+ }
+
+ if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
+ write_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_TIMEOUT;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ }
+ write_unlock_bh(&call->state_lock);
+
+ _debug("post timeout");
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ ETIME, true) < 0)
+ goto no_mem;
+
+ clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+ goto kill_ACKs;
+ }
+
+ /* deal with assorted inbound messages */
+ if (!skb_queue_empty(&call->rx_queue)) {
+ switch (rxrpc_process_rx_queue(call, &abort_code)) {
+ case 0:
+ case -EAGAIN:
+ break;
+ case -ENOMEM:
+ goto no_mem;
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EPROTO:
+ rxrpc_abort_call(call, abort_code);
+ goto kill_ACKs;
+ }
+ }
+
+ /* handle resending */
+ if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_resend_timer(call);
+ if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
+ rxrpc_resend(call);
+
+ /* consider sending an ordinary ACK */
+ if (test_bit(RXRPC_CALL_ACK, &call->events)) {
+ _debug("send ACK: window: %d - %d { %lx }",
+ call->rx_data_eaten, call->ackr_win_top,
+ call->ackr_window[0]);
+
+ if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
+ /* ACK by sending reply DATA packet in this state */
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+ goto maybe_reschedule;
+ }
+
+ genbit = RXRPC_CALL_ACK;
+
+ acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
+ GFP_NOFS);
+ if (!acks)
+ goto no_mem;
+
+ //hdr.flags = RXRPC_SLOW_START_OK;
+ ack.bufferSpace = htons(8);
+ ack.maxSkew = 0;
+ ack.serial = 0;
+ ack.reason = 0;
+
+ spin_lock_bh(&call->lock);
+ ack.reason = call->ackr_reason;
+ ack.serial = call->ackr_serial;
+ ack.previousPacket = call->ackr_prev_seq;
+ ack.firstPacket = htonl(call->rx_data_eaten + 1);
+
+ ack.nAcks = 0;
+ for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+ nbit = loop * BITS_PER_LONG;
+ for (bits = call->ackr_window[loop]; bits; bits >>= 1
+ ) {
+ _debug("- l=%d n=%d b=%lx", loop, nbit, bits);
+ if (bits & 1) {
+ acks[nbit] = RXRPC_ACK_TYPE_ACK;
+ ack.nAcks = nbit + 1;
+ }
+ nbit++;
+ }
+ }
+ call->ackr_reason = 0;
+ spin_unlock_bh(&call->lock);
+
+ pad = 0;
+
+ iov[1].iov_base = &ack;
+ iov[1].iov_len = sizeof(ack);
+ iov[2].iov_base = acks;
+ iov[2].iov_len = ack.nAcks;
+ iov[3].iov_base = &pad;
+ iov[3].iov_len = 3;
+ iov[4].iov_base = &ackinfo;
+ iov[4].iov_len = sizeof(ackinfo);
+
+ switch (ack.reason) {
+ case RXRPC_ACK_REQUESTED:
+ case RXRPC_ACK_DUPLICATE:
+ case RXRPC_ACK_OUT_OF_SEQUENCE:
+ case RXRPC_ACK_EXCEEDS_WINDOW:
+ case RXRPC_ACK_NOSPACE:
+ case RXRPC_ACK_PING:
+ case RXRPC_ACK_PING_RESPONSE:
+ goto send_ACK_with_skew;
+ case RXRPC_ACK_DELAY:
+ case RXRPC_ACK_IDLE:
+ goto send_ACK;
+ }
+ }
+
+ /* handle completion of security negotiations on an incoming
+ * connection */
+ if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
+ _debug("secured");
+ spin_lock_bh(&call->lock);
+
+ if (call->state == RXRPC_CALL_SERVER_SECURING) {
+ _debug("securing");
+ write_lock(&call->conn->lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ _debug("not released");
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ list_move_tail(&call->accept_link,
+ &call->socket->acceptq);
+ }
+ write_unlock(&call->conn->lock);
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE)
+ set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+ read_unlock(&call->state_lock);
+ }
+
+ spin_unlock_bh(&call->lock);
+ if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
+ goto maybe_reschedule;
+ }
+
+ /* post a notification of an acceptable connection to the app */
+ if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
+ _debug("post accept");
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
+ 0, false) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+ goto maybe_reschedule;
+ }
+
+ /* handle incoming call acceptance */
+ if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
+ _debug("accepted");
+ ASSERTCMP(call->rx_data_post, ==, 0);
+ call->rx_data_post = 1;
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE)
+ set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+ read_unlock_bh(&call->state_lock);
+ }
+
+ /* drain the out of sequence received packet queue into the packet Rx
+ * queue */
+ if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
+ while (call->rx_data_post == call->rx_first_oos)
+ if (rxrpc_drain_rx_oos_queue(call) < 0)
+ break;
+ goto maybe_reschedule;
+ }
+
+ /* other events may have been raised since we started checking */
+ goto maybe_reschedule;
+
+send_ACK_with_skew:
+ ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
+ ntohl(ack.serial));
+send_ACK:
+ mtu = call->conn->trans->peer->if_mtu;
+ mtu -= call->conn->trans->peer->hdrsize;
+ ackinfo.maxMTU = htonl(mtu);
+ ackinfo.rwind = htonl(32);
+
+ /* permit the peer to send us jumbo packets if it wants to */
+ ackinfo.rxMTU = htonl(5692);
+ ackinfo.jumbo_max = htonl(4);
+
+ hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+ _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+ ntohl(hdr.serial),
+ ntohs(ack.maxSkew),
+ ntohl(ack.firstPacket),
+ ntohl(ack.previousPacket),
+ ntohl(ack.serial),
+ rxrpc_acks[ack.reason],
+ ack.nAcks);
+
+ del_timer_sync(&call->ack_timer);
+ if (ack.nAcks > 0)
+ set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
+ goto send_message_2;
+
+send_message:
+ _debug("send message");
+
+ hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+ _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
+send_message_2:
+
+ len = iov[0].iov_len;
+ ioc = 1;
+ if (iov[4].iov_len) {
+ ioc = 5;
+ len += iov[4].iov_len;
+ len += iov[3].iov_len;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[3].iov_len) {
+ ioc = 4;
+ len += iov[3].iov_len;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[2].iov_len) {
+ ioc = 3;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[1].iov_len) {
+ ioc = 2;
+ len += iov[1].iov_len;
+ }
+
+ ret = kernel_sendmsg(call->conn->trans->local->socket,
+ &msg, iov, ioc, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ goto error;
+ }
+
+ switch (genbit) {
+ case RXRPC_CALL_ABORT:
+ clear_bit(genbit, &call->events);
+ clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ goto kill_ACKs;
+
+ case RXRPC_CALL_ACK_FINAL:
+ write_lock_bh(&call->state_lock);
+ if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
+ call->state = RXRPC_CALL_COMPLETE;
+ write_unlock_bh(&call->state_lock);
+ goto kill_ACKs;
+
+ default:
+ clear_bit(genbit, &call->events);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ _debug("start ACK timer");
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
+ call->ackr_serial, false);
+ default:
+ break;
+ }
+ goto maybe_reschedule;
+ }
+
+kill_ACKs:
+ del_timer_sync(&call->ack_timer);
+ if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
+ rxrpc_put_call(call);
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+
+maybe_reschedule:
+ if (call->events || !skb_queue_empty(&call->rx_queue)) {
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ }
+
+ /* don't leave aborted connections on the accept queue */
+ if (call->state >= RXRPC_CALL_COMPLETE &&
+ !list_empty(&call->accept_link)) {
+ _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
+ call, call->events, call->flags,
+ ntohl(call->conn->cid));
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ }
+
+error:
+ clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
+ kfree(acks);
+
+ /* because we don't want two CPUs both processing the work item for one
+ * call at the same time, we use a flag to note when it's busy; however
+ * this means there's a race between clearing the flag and setting the
+ * work pending bit and the work item being processed again */
+ if (call->events && !work_pending(&call->processor)) {
+ _debug("jumpstart %x", ntohl(call->conn->cid));
+ rxrpc_queue_call(call);
+ }
+
+ _leave("");
+ return;
+
+no_mem:
+ _debug("out of memory");
+ goto maybe_reschedule;
+}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
new file mode 100644
index 000000000000..4d92d88ff1fc
--- /dev/null
+++ b/net/rxrpc/ar-call.c
@@ -0,0 +1,804 @@
+/* RxRPC individual remote procedure call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+struct kmem_cache *rxrpc_call_jar;
+LIST_HEAD(rxrpc_calls);
+DEFINE_RWLOCK(rxrpc_call_lock);
+static unsigned rxrpc_call_max_lifetime = 60;
+static unsigned rxrpc_dead_call_timeout = 2;
+
+static void rxrpc_destroy_call(struct work_struct *work);
+static void rxrpc_call_life_expired(unsigned long _call);
+static void rxrpc_dead_call_expired(unsigned long _call);
+static void rxrpc_ack_time_expired(unsigned long _call);
+static void rxrpc_resend_time_expired(unsigned long _call);
+
+/*
+ * allocate a new call
+ */
+static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+{
+ struct rxrpc_call *call;
+
+ call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
+ if (!call)
+ return NULL;
+
+ call->acks_winsz = 16;
+ call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+ gfp);
+ if (!call->acks_window) {
+ kmem_cache_free(rxrpc_call_jar, call);
+ return NULL;
+ }
+
+ setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
+ (unsigned long) call);
+ setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
+ (unsigned long) call);
+ setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
+ (unsigned long) call);
+ setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
+ (unsigned long) call);
+ INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+ INIT_WORK(&call->processor, &rxrpc_process_call);
+ INIT_LIST_HEAD(&call->accept_link);
+ skb_queue_head_init(&call->rx_queue);
+ skb_queue_head_init(&call->rx_oos_queue);
+ init_waitqueue_head(&call->tx_waitq);
+ spin_lock_init(&call->lock);
+ rwlock_init(&call->state_lock);
+ atomic_set(&call->usage, 1);
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
+ memset(&call->sock_node, 0xed, sizeof(call->sock_node));
+
+ call->rx_data_expect = 1;
+ call->rx_data_eaten = 0;
+ call->rx_first_oos = 0;
+ call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+ call->creation_jif = jiffies;
+ return call;
+}
+
+/*
+ * allocate a new client call and attempt to to get a connection slot for it
+ */
+static struct rxrpc_call *rxrpc_alloc_client_call(
+ struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call;
+ int ret;
+
+ _enter("");
+
+ ASSERT(rx != NULL);
+ ASSERT(trans != NULL);
+ ASSERT(bundle != NULL);
+
+ call = rxrpc_alloc_call(gfp);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ sock_hold(&rx->sk);
+ call->socket = rx;
+ call->rx_data_post = 1;
+
+ ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
+ if (ret < 0) {
+ kmem_cache_free(rxrpc_call_jar, call);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&call->conn->trans->peer->lock);
+ list_add(&call->error_link, &call->conn->trans->peer->error_targets);
+ spin_unlock(&call->conn->trans->peer->lock);
+
+ call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ add_timer(&call->lifetimer);
+
+ _leave(" = %p", call);
+ return call;
+}
+
+/*
+ * set up a call for the given data
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ unsigned long user_call_ID,
+ int create,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call, *candidate;
+ struct rb_node *p, *parent, **pp;
+
+ _enter("%p,%d,%d,%lx,%d",
+ rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
+ user_call_ID, create);
+
+ /* search the extant calls first for one that matches the specified
+ * user ID */
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+
+ if (!create || !trans)
+ return ERR_PTR(-EBADSLT);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return candidate;
+ }
+
+ candidate->user_call_ID = user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+
+ write_lock(&rx->call_lock);
+
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* second search also failed; add the new call */
+ call = candidate;
+ candidate = NULL;
+ rxrpc_get_call(call);
+
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ write_unlock(&rx->call_lock);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_add_tail(&call->link, &rxrpc_calls);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+ _leave(" = %p [new]", call);
+ return call;
+
+ /* we found the call in the list immediately */
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [extant %d]", call, atomic_read(&call->usage));
+ return call;
+
+ /* we found the call on the second time through the list */
+found_extant_second:
+ rxrpc_get_call(call);
+ write_unlock(&rx->call_lock);
+ rxrpc_put_call(candidate);
+ _leave(" = %p [second %d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
+ * set up an incoming call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_connection *conn,
+ struct rxrpc_header *hdr,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call, *candidate;
+ struct rb_node **p, *parent;
+ __be32 call_id;
+
+ _enter(",%d,,%x", conn->debug_id, gfp);
+
+ ASSERT(rx != NULL);
+
+ candidate = rxrpc_alloc_call(gfp);
+ if (!candidate)
+ return ERR_PTR(-EBUSY);
+
+ candidate->socket = rx;
+ candidate->conn = conn;
+ candidate->cid = hdr->cid;
+ candidate->call_id = hdr->callNumber;
+ candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK;
+ candidate->rx_data_post = 0;
+ candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+ if (conn->security_ix > 0)
+ candidate->state = RXRPC_CALL_SERVER_SECURING;
+
+ write_lock_bh(&conn->lock);
+
+ /* set the channel for this call */
+ call = conn->channels[candidate->channel];
+ _debug("channel[%u] is %p", candidate->channel, call);
+ if (call && call->call_id == hdr->callNumber) {
+ /* already set; must've been a duplicate packet */
+ _debug("extant call [%d]", call->state);
+ ASSERTCMP(call->conn, ==, conn);
+
+ read_lock(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ read_unlock(&call->state_lock);
+ goto aborted_call;
+ default:
+ rxrpc_get_call(call);
+ read_unlock(&call->state_lock);
+ goto extant_call;
+ }
+ }
+
+ if (call) {
+ /* it seems the channel is still in use from the previous call
+ * - ditch the old binding if its call is now complete */
+ _debug("CALL: %u { %s }",
+ call->debug_id, rxrpc_call_states[call->state]);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ conn->channels[call->channel] = NULL;
+ } else {
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -EBUSY");
+ return ERR_PTR(-EBUSY);
+ }
+ }
+
+ /* check the call number isn't duplicate */
+ _debug("check dup");
+ call_id = hdr->callNumber;
+ p = &conn->calls.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ call = rb_entry(parent, struct rxrpc_call, conn_node);
+
+ if (call_id < call->call_id)
+ p = &(*p)->rb_left;
+ else if (call_id > call->call_id)
+ p = &(*p)->rb_right;
+ else
+ goto old_call;
+ }
+
+ /* make the call available */
+ _debug("new call");
+ call = candidate;
+ candidate = NULL;
+ rb_link_node(&call->conn_node, parent, p);
+ rb_insert_color(&call->conn_node, &conn->calls);
+ conn->channels[call->channel] = call;
+ sock_hold(&rx->sk);
+ atomic_inc(&conn->usage);
+ write_unlock_bh(&conn->lock);
+
+ spin_lock(&conn->trans->peer->lock);
+ list_add(&call->error_link, &conn->trans->peer->error_targets);
+ spin_unlock(&conn->trans->peer->lock);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_add_tail(&call->link, &rxrpc_calls);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+ call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ add_timer(&call->lifetimer);
+ _leave(" = %p {%d} [new]", call, call->debug_id);
+ return call;
+
+extant_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
+ return call;
+
+aborted_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -ECONNABORTED");
+ return ERR_PTR(-ECONNABORTED);
+
+old_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -ECONNRESET [old]");
+ return ERR_PTR(-ECONNRESET);
+}
+
+/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p,%lx", rx, user_call_ID);
+
+ /* search the extant calls for one that matches the specified user
+ * ID */
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+ _leave(" = NULL");
+ return NULL;
+
+ /* we found the call in the list immediately */
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
+ * detach a call from a socket and set up for release
+ */
+void rxrpc_release_call(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_sock *rx = call->socket;
+
+ _enter("{%d,%d,%d,%d}",
+ call->debug_id, atomic_read(&call->usage),
+ atomic_read(&call->ackr_not_idle),
+ call->rx_first_oos);
+
+ spin_lock_bh(&call->lock);
+ if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
+ BUG();
+ spin_unlock_bh(&call->lock);
+
+ /* dissociate from the socket
+ * - the socket's ref on the call is passed to the death timer
+ */
+ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+
+ write_lock_bh(&rx->call_lock);
+ if (!list_empty(&call->accept_link)) {
+ _debug("unlinking once-pending call %p { e=%lx f=%lx }",
+ call, call->events, call->flags);
+ ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+ } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ rb_erase(&call->sock_node, &rx->calls);
+ memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+ clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ }
+ write_unlock_bh(&rx->call_lock);
+
+ /* free up the channel for reuse */
+ spin_lock(&conn->trans->client_lock);
+ write_lock_bh(&conn->lock);
+ write_lock(&call->state_lock);
+
+ if (conn->channels[call->channel] == call)
+ conn->channels[call->channel] = NULL;
+
+ if (conn->out_clientflag && conn->bundle) {
+ conn->avail_calls++;
+ switch (conn->avail_calls) {
+ case 1:
+ list_move_tail(&conn->bundle_link,
+ &conn->bundle->avail_conns);
+ case 2 ... RXRPC_MAXCALLS - 1:
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ break;
+ case RXRPC_MAXCALLS:
+ list_move_tail(&conn->bundle_link,
+ &conn->bundle->unused_conns);
+ ASSERT(conn->channels[0] == NULL &&
+ conn->channels[1] == NULL &&
+ conn->channels[2] == NULL &&
+ conn->channels[3] == NULL);
+ break;
+ default:
+ printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
+ conn->avail_calls);
+ BUG();
+ }
+ }
+
+ spin_unlock(&conn->trans->client_lock);
+
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
+ _debug("+++ ABORTING STATE %d +++\n", call->state);
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_DEAD;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ write_unlock_bh(&conn->lock);
+
+ /* clean up the Rx queue */
+ if (!skb_queue_empty(&call->rx_queue) ||
+ !skb_queue_empty(&call->rx_oos_queue)) {
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+
+ _debug("purge Rx queues");
+
+ spin_lock_bh(&call->lock);
+ while ((skb = skb_dequeue(&call->rx_queue)) ||
+ (skb = skb_dequeue(&call->rx_oos_queue))) {
+ sp = rxrpc_skb(skb);
+ if (sp->call) {
+ ASSERTCMP(sp->call, ==, call);
+ rxrpc_put_call(call);
+ sp->call = NULL;
+ }
+ skb->destructor = NULL;
+ spin_unlock_bh(&call->lock);
+
+ _debug("- zap %s %%%u #%u",
+ rxrpc_pkts[sp->hdr.type],
+ ntohl(sp->hdr.serial),
+ ntohl(sp->hdr.seq));
+ rxrpc_free_skb(skb);
+ spin_lock_bh(&call->lock);
+ }
+ spin_unlock_bh(&call->lock);
+
+ ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+ }
+
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ del_timer_sync(&call->lifetimer);
+ call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+ add_timer(&call->deadspan);
+
+ _leave("");
+}
+
+/*
+ * handle a dead call being ready for reaping
+ */
+static void rxrpc_dead_call_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ write_lock_bh(&call->state_lock);
+ call->state = RXRPC_CALL_DEAD;
+ write_unlock_bh(&call->state_lock);
+ rxrpc_put_call(call);
+}
+
+/*
+ * mark a call as to be released, aborting it if it's still in progress
+ * - called with softirqs disabled
+ */
+static void rxrpc_mark_call_released(struct rxrpc_call *call)
+{
+ bool sched;
+
+ write_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD) {
+ sched = false;
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ _debug("abort call %p", call);
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_DEAD;
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ sched = true;
+ }
+ if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ sched = true;
+ if (sched)
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+}
+
+/*
+ * release all the calls associated with a socket
+ */
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p", rx);
+
+ read_lock_bh(&rx->call_lock);
+
+ /* mark all the calls as no longer wanting incoming packets */
+ for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+ rxrpc_mark_call_released(call);
+ }
+
+ /* kill the not-yet-accepted incoming calls */
+ list_for_each_entry(call, &rx->secureq, accept_link) {
+ rxrpc_mark_call_released(call);
+ }
+
+ list_for_each_entry(call, &rx->acceptq, accept_link) {
+ rxrpc_mark_call_released(call);
+ }
+
+ read_unlock_bh(&rx->call_lock);
+ _leave("");
+}
+
+/*
+ * release a call
+ */
+void __rxrpc_put_call(struct rxrpc_call *call)
+{
+ ASSERT(call != NULL);
+
+ _enter("%p{u=%d}", call, atomic_read(&call->usage));
+
+ ASSERTCMP(atomic_read(&call->usage), >, 0);
+
+ if (atomic_dec_and_test(&call->usage)) {
+ _debug("call %d dead", call->debug_id);
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+ rxrpc_queue_work(&call->destroyer);
+ }
+ _leave("");
+}
+
+/*
+ * clean up a call
+ */
+static void rxrpc_cleanup_call(struct rxrpc_call *call)
+{
+ _net("DESTROY CALL %d", call->debug_id);
+
+ ASSERT(call->socket);
+
+ memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+ del_timer_sync(&call->lifetimer);
+ del_timer_sync(&call->deadspan);
+ del_timer_sync(&call->ack_timer);
+ del_timer_sync(&call->resend_timer);
+
+ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+ ASSERTCMP(call->events, ==, 0);
+ if (work_pending(&call->processor)) {
+ _debug("defer destroy");
+ rxrpc_queue_work(&call->destroyer);
+ return;
+ }
+
+ if (call->conn) {
+ spin_lock(&call->conn->trans->peer->lock);
+ list_del(&call->error_link);
+ spin_unlock(&call->conn->trans->peer->lock);
+
+ write_lock_bh(&call->conn->lock);
+ rb_erase(&call->conn_node, &call->conn->calls);
+ write_unlock_bh(&call->conn->lock);
+ rxrpc_put_connection(call->conn);
+ }
+
+ if (call->acks_window) {
+ _debug("kill Tx window %d",
+ CIRC_CNT(call->acks_head, call->acks_tail,
+ call->acks_winsz));
+ smp_mb();
+ while (CIRC_CNT(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 0) {
+ struct rxrpc_skb_priv *sp;
+ unsigned long _skb;
+
+ _skb = call->acks_window[call->acks_tail] & ~1;
+ sp = rxrpc_skb((struct sk_buff *) _skb);
+ _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+ rxrpc_free_skb((struct sk_buff *) _skb);
+ call->acks_tail =
+ (call->acks_tail + 1) & (call->acks_winsz - 1);
+ }
+
+ kfree(call->acks_window);
+ }
+
+ rxrpc_free_skb(call->tx_pending);
+
+ rxrpc_purge_queue(&call->rx_queue);
+ ASSERT(skb_queue_empty(&call->rx_oos_queue));
+ sock_put(&call->socket->sk);
+ kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
+ * destroy a call
+ */
+static void rxrpc_destroy_call(struct work_struct *work)
+{
+ struct rxrpc_call *call =
+ container_of(work, struct rxrpc_call, destroyer);
+
+ _enter("%p{%d,%d,%p}",
+ call, atomic_read(&call->usage), call->channel, call->conn);
+
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_del_init(&call->link);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ rxrpc_cleanup_call(call);
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the call records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_calls(void)
+{
+ struct rxrpc_call *call;
+
+ _enter("");
+ write_lock_bh(&rxrpc_call_lock);
+
+ while (!list_empty(&rxrpc_calls)) {
+ call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
+
+ list_del_init(&call->link);
+
+ switch (atomic_read(&call->usage)) {
+ case 0:
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+ break;
+ case 1:
+ if (del_timer_sync(&call->deadspan) != 0 &&
+ call->state != RXRPC_CALL_DEAD)
+ rxrpc_dead_call_expired((unsigned long) call);
+ if (call->state != RXRPC_CALL_DEAD)
+ break;
+ default:
+ printk(KERN_ERR "RXRPC:"
+ " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ atomic_read(&call->ackr_not_idle),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
+ if (!skb_queue_empty(&call->rx_queue))
+ printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+ if (!skb_queue_empty(&call->rx_oos_queue))
+ printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+ break;
+ }
+
+ write_unlock_bh(&rxrpc_call_lock);
+ cond_resched();
+ write_lock_bh(&rxrpc_call_lock);
+ }
+
+ write_unlock_bh(&rxrpc_call_lock);
+ _leave("");
+}
+
+/*
+ * handle call lifetime being exceeded
+ */
+static void rxrpc_call_life_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ _enter("{%d}", call->debug_id);
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_time_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ read_lock_bh(&call->state_lock);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle ACK timer expiry
+ */
+static void rxrpc_ack_time_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
new file mode 100644
index 000000000000..43cb3e051ece
--- /dev/null
+++ b/net/rxrpc/ar-connection.c
@@ -0,0 +1,911 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static unsigned long rxrpc_connection_timeout = 10 * 60;
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new client connection bundle
+ */
+static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle;
+
+ _enter("");
+
+ bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
+ if (bundle) {
+ INIT_LIST_HEAD(&bundle->unused_conns);
+ INIT_LIST_HEAD(&bundle->avail_conns);
+ INIT_LIST_HEAD(&bundle->busy_conns);
+ init_waitqueue_head(&bundle->chanwait);
+ atomic_set(&bundle->usage, 1);
+ }
+
+ _leave(" = %p", bundle);
+ return bundle;
+}
+
+/*
+ * compare bundle parameters with what we're looking for
+ * - return -ve, 0 or +ve
+ */
+static inline
+int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
+ struct key *key, __be16 service_id)
+{
+ return (bundle->service_id - service_id) ?:
+ ((unsigned long) bundle->key - (unsigned long) key);
+}
+
+/*
+ * get bundle of client connections that a client socket can make use of
+ */
+struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct key *key,
+ __be16 service_id,
+ gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle, *candidate;
+ struct rb_node *p, *parent, **pp;
+
+ _enter("%p{%x},%x,%hx,",
+ rx, key_serial(key), trans->debug_id, ntohl(service_id));
+
+ if (rx->trans == trans && rx->bundle) {
+ atomic_inc(&rx->bundle->usage);
+ return rx->bundle;
+ }
+
+ /* search the extant bundles first for one that matches the specified
+ * user ID */
+ spin_lock(&trans->client_lock);
+
+ p = trans->bundles.rb_node;
+ while (p) {
+ bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
+
+ if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+ p = p->rb_left;
+ else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_bundle;
+ }
+
+ spin_unlock(&trans->client_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_bundle(gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ candidate->key = key_get(key);
+ candidate->service_id = service_id;
+
+ spin_lock(&trans->client_lock);
+
+ pp = &trans->bundles.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
+
+ if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+ pp = &(*pp)->rb_left;
+ else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* second search also failed; add the new bundle */
+ bundle = candidate;
+ candidate = NULL;
+
+ rb_link_node(&bundle->node, parent, pp);
+ rb_insert_color(&bundle->node, &trans->bundles);
+ spin_unlock(&trans->client_lock);
+ _net("BUNDLE new on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [new]", bundle);
+ return bundle;
+
+ /* we found the bundle in the list immediately */
+found_extant_bundle:
+ atomic_inc(&bundle->usage);
+ spin_unlock(&trans->client_lock);
+ _net("BUNDLE old on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
+ return bundle;
+
+ /* we found the bundle on the second time through the list */
+found_extant_second:
+ atomic_inc(&bundle->usage);
+ spin_unlock(&trans->client_lock);
+ kfree(candidate);
+ _net("BUNDLE old2 on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
+ return bundle;
+}
+
+/*
+ * release a bundle
+ */
+void rxrpc_put_bundle(struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle)
+{
+ _enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
+
+ if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
+ _debug("Destroy bundle");
+ rb_erase(&bundle->node, &trans->bundles);
+ spin_unlock(&trans->client_lock);
+ ASSERT(list_empty(&bundle->unused_conns));
+ ASSERT(list_empty(&bundle->avail_conns));
+ ASSERT(list_empty(&bundle->busy_conns));
+ ASSERTCMP(bundle->num_conns, ==, 0);
+ key_put(bundle->key);
+ kfree(bundle);
+ }
+
+ _leave("");
+}
+
+/*
+ * allocate a new connection
+ */
+static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+
+ _enter("");
+
+ conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+ if (conn) {
+ INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_LIST_HEAD(&conn->bundle_link);
+ conn->calls = RB_ROOT;
+ skb_queue_head_init(&conn->rx_queue);
+ rwlock_init(&conn->lock);
+ spin_lock_init(&conn->state_lock);
+ atomic_set(&conn->usage, 1);
+ conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ conn->avail_calls = RXRPC_MAXCALLS;
+ conn->size_align = 4;
+ conn->header_size = sizeof(struct rxrpc_header);
+ }
+
+ _leave(" = %p{%d}", conn, conn->debug_id);
+ return conn;
+}
+
+/*
+ * assign a connection ID to a connection and add it to the transport's
+ * connection lookup tree
+ * - called with transport client lock held
+ */
+static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *xconn;
+ struct rb_node *parent, **p;
+ __be32 epoch;
+ u32 real_conn_id;
+
+ _enter("");
+
+ epoch = conn->epoch;
+
+ write_lock_bh(&conn->trans->conn_lock);
+
+ conn->trans->conn_idcounter += RXRPC_CID_INC;
+ if (conn->trans->conn_idcounter < RXRPC_CID_INC)
+ conn->trans->conn_idcounter = RXRPC_CID_INC;
+ real_conn_id = conn->trans->conn_idcounter;
+
+attempt_insertion:
+ parent = NULL;
+ p = &conn->trans->client_conns.rb_node;
+
+ while (*p) {
+ parent = *p;
+ xconn = rb_entry(parent, struct rxrpc_connection, node);
+
+ if (epoch < xconn->epoch)
+ p = &(*p)->rb_left;
+ else if (epoch > xconn->epoch)
+ p = &(*p)->rb_right;
+ else if (real_conn_id < xconn->real_conn_id)
+ p = &(*p)->rb_left;
+ else if (real_conn_id > xconn->real_conn_id)
+ p = &(*p)->rb_right;
+ else
+ goto id_exists;
+ }
+
+ /* we've found a suitable hole - arrange for this connection to occupy
+ * it */
+ rb_link_node(&conn->node, parent, p);
+ rb_insert_color(&conn->node, &conn->trans->client_conns);
+
+ conn->real_conn_id = real_conn_id;
+ conn->cid = htonl(real_conn_id);
+ write_unlock_bh(&conn->trans->conn_lock);
+ _leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid));
+ return;
+
+ /* we found a connection with the proposed ID - walk the tree from that
+ * point looking for the next unused ID */
+id_exists:
+ for (;;) {
+ real_conn_id += RXRPC_CID_INC;
+ if (real_conn_id < RXRPC_CID_INC) {
+ real_conn_id = RXRPC_CID_INC;
+ conn->trans->conn_idcounter = real_conn_id;
+ goto attempt_insertion;
+ }
+
+ parent = rb_next(parent);
+ if (!parent)
+ goto attempt_insertion;
+
+ xconn = rb_entry(parent, struct rxrpc_connection, node);
+ if (epoch < xconn->epoch ||
+ real_conn_id < xconn->real_conn_id)
+ goto attempt_insertion;
+ }
+}
+
+/*
+ * add a call to a connection's call-by-ID tree
+ */
+static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ struct rxrpc_call *xcall;
+ struct rb_node *parent, **p;
+ __be32 call_id;
+
+ write_lock_bh(&conn->lock);
+
+ call_id = call->call_id;
+ p = &conn->calls.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ xcall = rb_entry(parent, struct rxrpc_call, conn_node);
+
+ if (call_id < xcall->call_id)
+ p = &(*p)->rb_left;
+ else if (call_id > xcall->call_id)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&call->conn_node, parent, p);
+ rb_insert_color(&call->conn_node, &conn->calls);
+
+ write_unlock_bh(&conn->lock);
+}
+
+/*
+ * connect a call on an exclusive connection
+ */
+static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ __be16 service_id,
+ struct rxrpc_call *call,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int chan, ret;
+
+ _enter("");
+
+ conn = rx->conn;
+ if (!conn) {
+ /* not yet present - create a candidate for a new connection
+ * and then redo the check */
+ conn = rxrpc_alloc_connection(gfp);
+ if (IS_ERR(conn)) {
+ _leave(" = %ld", PTR_ERR(conn));
+ return PTR_ERR(conn);
+ }
+
+ conn->trans = trans;
+ conn->bundle = NULL;
+ conn->service_id = service_id;
+ conn->epoch = rxrpc_epoch;
+ conn->in_clientflag = 0;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->cid = 0;
+ conn->state = RXRPC_CONN_CLIENT;
+ conn->avail_calls = RXRPC_MAXCALLS - 1;
+ conn->security_level = rx->min_sec_level;
+ conn->key = key_get(rx->key);
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0) {
+ key_put(conn->key);
+ kfree(conn);
+ _leave(" = %d [key]", ret);
+ return ret;
+ }
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ spin_lock(&trans->client_lock);
+ atomic_inc(&trans->usage);
+
+ _net("CONNECT EXCL new %d on TRANS %d",
+ conn->debug_id, conn->trans->debug_id);
+
+ rxrpc_assign_connection_id(conn);
+ rx->conn = conn;
+ }
+
+ /* we've got a connection with a free channel and we can now attach the
+ * call to it
+ * - we're holding the transport's client lock
+ * - we're holding a reference on the connection
+ */
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan])
+ goto found_channel;
+ goto no_free_channels;
+
+found_channel:
+ atomic_inc(&conn->usage);
+ conn->channels[chan] = call;
+ call->conn = conn;
+ call->channel = chan;
+ call->cid = conn->cid | htonl(chan);
+ call->call_id = htonl(++conn->call_counter);
+
+ _net("CONNECT client on conn %d chan %d as call %x",
+ conn->debug_id, chan, ntohl(call->call_id));
+
+ spin_unlock(&trans->client_lock);
+
+ rxrpc_add_call_ID_to_conn(conn, call);
+ _leave(" = 0");
+ return 0;
+
+no_free_channels:
+ spin_unlock(&trans->client_lock);
+ _leave(" = -ENOSR");
+ return -ENOSR;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ struct rxrpc_call *call,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate;
+ int chan, ret;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("%p,%lx,", rx, call->user_call_ID);
+
+ if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
+ return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
+ call, gfp);
+
+ spin_lock(&trans->client_lock);
+ for (;;) {
+ /* see if the bundle has a call slot available */
+ if (!list_empty(&bundle->avail_conns)) {
+ _debug("avail");
+ conn = list_entry(bundle->avail_conns.next,
+ struct rxrpc_connection,
+ bundle_link);
+ if (--conn->avail_calls == 0)
+ list_move(&conn->bundle_link,
+ &bundle->busy_conns);
+ ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ atomic_inc(&conn->usage);
+ break;
+ }
+
+ if (!list_empty(&bundle->unused_conns)) {
+ _debug("unused");
+ conn = list_entry(bundle->unused_conns.next,
+ struct rxrpc_connection,
+ bundle_link);
+ ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
+ conn->avail_calls = RXRPC_MAXCALLS - 1;
+ ASSERT(conn->channels[0] == NULL &&
+ conn->channels[1] == NULL &&
+ conn->channels[2] == NULL &&
+ conn->channels[3] == NULL);
+ atomic_inc(&conn->usage);
+ list_move(&conn->bundle_link, &bundle->avail_conns);
+ break;
+ }
+
+ /* need to allocate a new connection */
+ _debug("get new conn [%d]", bundle->num_conns);
+
+ spin_unlock(&trans->client_lock);
+
+ if (signal_pending(current))
+ goto interrupted;
+
+ if (bundle->num_conns >= 20) {
+ _debug("too many conns");
+
+ if (!(gfp & __GFP_WAIT)) {
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&bundle->chanwait, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (bundle->num_conns < 20 ||
+ !list_empty(&bundle->unused_conns) ||
+ !list_empty(&bundle->avail_conns))
+ break;
+ if (signal_pending(current))
+ goto interrupted_dequeue;
+ schedule();
+ }
+ remove_wait_queue(&bundle->chanwait, &myself);
+ __set_current_state(TASK_RUNNING);
+ spin_lock(&trans->client_lock);
+ continue;
+ }
+
+ /* not yet present - create a candidate for a new connection and then
+ * redo the check */
+ candidate = rxrpc_alloc_connection(gfp);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return PTR_ERR(candidate);
+ }
+
+ candidate->trans = trans;
+ candidate->bundle = bundle;
+ candidate->service_id = bundle->service_id;
+ candidate->epoch = rxrpc_epoch;
+ candidate->in_clientflag = 0;
+ candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
+ candidate->cid = 0;
+ candidate->state = RXRPC_CONN_CLIENT;
+ candidate->avail_calls = RXRPC_MAXCALLS;
+ candidate->security_level = rx->min_sec_level;
+ candidate->key = key_get(bundle->key);
+
+ ret = rxrpc_init_client_conn_security(candidate);
+ if (ret < 0) {
+ key_put(candidate->key);
+ kfree(candidate);
+ _leave(" = %d [key]", ret);
+ return ret;
+ }
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&candidate->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ spin_lock(&trans->client_lock);
+
+ list_add(&candidate->bundle_link, &bundle->unused_conns);
+ bundle->num_conns++;
+ atomic_inc(&bundle->usage);
+ atomic_inc(&trans->usage);
+
+ _net("CONNECT new %d on TRANS %d",
+ candidate->debug_id, candidate->trans->debug_id);
+
+ rxrpc_assign_connection_id(candidate);
+ if (candidate->security)
+ candidate->security->prime_packet_security(candidate);
+
+ /* leave the candidate lurking in zombie mode attached to the
+ * bundle until we're ready for it */
+ rxrpc_put_connection(candidate);
+ candidate = NULL;
+ }
+
+ /* we've got a connection with a free channel and we can now attach the
+ * call to it
+ * - we're holding the transport's client lock
+ * - we're holding a reference on the connection
+ * - we're holding a reference on the bundle
+ */
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan])
+ goto found_channel;
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ BUG();
+
+found_channel:
+ conn->channels[chan] = call;
+ call->conn = conn;
+ call->channel = chan;
+ call->cid = conn->cid | htonl(chan);
+ call->call_id = htonl(++conn->call_counter);
+
+ _net("CONNECT client on conn %d chan %d as call %x",
+ conn->debug_id, chan, ntohl(call->call_id));
+
+ ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+ spin_unlock(&trans->client_lock);
+
+ rxrpc_add_call_ID_to_conn(conn, call);
+
+ _leave(" = 0");
+ return 0;
+
+interrupted_dequeue:
+ remove_wait_queue(&bundle->chanwait, &myself);
+ __set_current_state(TASK_RUNNING);
+interrupted:
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *trans,
+ struct rxrpc_header *hdr,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rb_node *p, **pp;
+ const char *new = "old";
+ __be32 epoch;
+ u32 conn_id;
+
+ _enter("");
+
+ ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
+
+ epoch = hdr->epoch;
+ conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+
+ /* search the connection list first */
+ read_lock_bh(&trans->conn_lock);
+
+ p = trans->server_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ _debug("maybe %x", conn->real_conn_id);
+
+ if (epoch < conn->epoch)
+ p = p->rb_left;
+ else if (epoch > conn->epoch)
+ p = p->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ p = p->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ p = p->rb_right;
+ else
+ goto found_extant_connection;
+ }
+ read_unlock_bh(&trans->conn_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_connection(gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ candidate->trans = trans;
+ candidate->epoch = hdr->epoch;
+ candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
+ candidate->service_id = hdr->serviceId;
+ candidate->security_ix = hdr->securityIndex;
+ candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
+ candidate->out_clientflag = 0;
+ candidate->real_conn_id = conn_id;
+ candidate->state = RXRPC_CONN_SERVER;
+ if (candidate->service_id)
+ candidate->state = RXRPC_CONN_SERVER_UNSECURED;
+
+ write_lock_bh(&trans->conn_lock);
+
+ pp = &trans->server_conns.rb_node;
+ p = NULL;
+ while (*pp) {
+ p = *pp;
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ if (epoch < conn->epoch)
+ pp = &(*pp)->rb_left;
+ else if (epoch > conn->epoch)
+ pp = &(*pp)->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ pp = &(*pp)->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ conn = candidate;
+ candidate = NULL;
+ rb_link_node(&conn->node, p, pp);
+ rb_insert_color(&conn->node, &trans->server_conns);
+ atomic_inc(&conn->trans->usage);
+
+ write_unlock_bh(&trans->conn_lock);
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id);
+
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+ /* we found the connection in the list immediately */
+found_extant_connection:
+ if (hdr->securityIndex != conn->security_ix) {
+ read_unlock_bh(&trans->conn_lock);
+ goto security_mismatch;
+ }
+ atomic_inc(&conn->usage);
+ read_unlock_bh(&trans->conn_lock);
+ goto success;
+
+ /* we found the connection on the second time through the list */
+found_extant_second:
+ if (hdr->securityIndex != conn->security_ix) {
+ write_unlock_bh(&trans->conn_lock);
+ goto security_mismatch;
+ }
+ atomic_inc(&conn->usage);
+ write_unlock_bh(&trans->conn_lock);
+ kfree(candidate);
+ goto success;
+
+security_mismatch:
+ kfree(candidate);
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+}
+
+/*
+ * find a connection based on transport and RxRPC connection ID for an incoming
+ * packet
+ */
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
+ struct rxrpc_header *hdr)
+{
+ struct rxrpc_connection *conn;
+ struct rb_node *p;
+ __be32 epoch;
+ u32 conn_id;
+
+ _enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags);
+
+ read_lock_bh(&trans->conn_lock);
+
+ conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+ epoch = hdr->epoch;
+
+ if (hdr->flags & RXRPC_CLIENT_INITIATED)
+ p = trans->server_conns.rb_node;
+ else
+ p = trans->client_conns.rb_node;
+
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ _debug("maybe %x", conn->real_conn_id);
+
+ if (epoch < conn->epoch)
+ p = p->rb_left;
+ else if (epoch > conn->epoch)
+ p = p->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ p = p->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ p = p->rb_right;
+ else
+ goto found;
+ }
+
+ read_unlock_bh(&trans->conn_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found:
+ atomic_inc(&conn->usage);
+ read_unlock_bh(&trans->conn_lock);
+ _leave(" = %p", conn);
+ return conn;
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+ _enter("%p{u=%d,d=%d}",
+ conn, atomic_read(&conn->usage), conn->debug_id);
+
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+
+ conn->put_time = xtime.tv_sec;
+ if (atomic_dec_and_test(&conn->usage)) {
+ _debug("zombie");
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ }
+
+ _leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+{
+ _enter("%p{%d}", conn, atomic_read(&conn->usage));
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+ _net("DESTROY CONN %d", conn->debug_id);
+
+ if (conn->bundle)
+ rxrpc_put_bundle(conn->trans, conn->bundle);
+
+ ASSERT(RB_EMPTY_ROOT(&conn->calls));
+ rxrpc_purge_queue(&conn->rx_queue);
+
+ rxrpc_clear_conn_security(conn);
+ rxrpc_put_transport(conn->trans);
+ kfree(conn);
+ _leave("");
+}
+
+/*
+ * reap dead connections
+ */
+void rxrpc_connection_reaper(struct work_struct *work)
+{
+ struct rxrpc_connection *conn, *_p;
+ unsigned long now, earliest, reap_time;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = xtime.tv_sec;
+ earliest = ULONG_MAX;
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long) now - (long) conn->put_time);
+
+ if (likely(atomic_read(&conn->usage) > 0))
+ continue;
+
+ spin_lock(&conn->trans->client_lock);
+ write_lock(&conn->trans->conn_lock);
+ reap_time = conn->put_time + rxrpc_connection_timeout;
+
+ if (atomic_read(&conn->usage) > 0) {
+ ;
+ } else if (reap_time <= now) {
+ list_move_tail(&conn->link, &graveyard);
+ if (conn->out_clientflag)
+ rb_erase(&conn->node,
+ &conn->trans->client_conns);
+ else
+ rb_erase(&conn->node,
+ &conn->trans->server_conns);
+ if (conn->bundle) {
+ list_del_init(&conn->bundle_link);
+ conn->bundle->num_conns--;
+ }
+
+ } else if (reap_time < earliest) {
+ earliest = reap_time;
+ }
+
+ write_unlock(&conn->trans->conn_lock);
+ spin_unlock(&conn->trans->client_lock);
+ }
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ if (earliest != ULONG_MAX) {
+ _debug("reschedule reaper %ld", (long) earliest - now);
+ ASSERTCMP(earliest, >, now);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+ (earliest - now) * HZ);
+ }
+
+ /* then destroy all those pulled out */
+ while (!list_empty(&graveyard)) {
+ conn = list_entry(graveyard.next, struct rxrpc_connection,
+ link);
+ list_del_init(&conn->link);
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ rxrpc_destroy_connection(conn);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+ _enter("");
+
+ rxrpc_connection_timeout = 0;
+ cancel_delayed_work(&rxrpc_connection_reap);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
new file mode 100644
index 000000000000..1ada43d51165
--- /dev/null
+++ b/net/rxrpc/ar-connevent.c
@@ -0,0 +1,403 @@
+/* connection-level event handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * pass a connection-level abort onto all calls on that connection
+ */
+static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
+ u32 abort_code)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("{%d},%x", conn->debug_id, abort_code);
+
+ read_lock_bh(&conn->lock);
+
+ for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
+ call = rb_entry(p, struct rxrpc_call, conn_node);
+ write_lock(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = state;
+ call->abort_code = abort_code;
+ if (state == RXRPC_CALL_LOCALLY_ABORTED)
+ set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ else
+ set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ }
+
+ read_unlock_bh(&conn->lock);
+ _leave("");
+}
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_abort_connection(struct rxrpc_connection *conn,
+ u32 error, u32 abort_code)
+{
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ int ret;
+
+ _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
+
+ /* generate a connection-level abort */
+ spin_lock_bh(&conn->state_lock);
+ if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
+ conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+ conn->error = error;
+ spin_unlock_bh(&conn->state_lock);
+ } else {
+ spin_unlock_bh(&conn->state_lock);
+ _leave(" = 0 [already dead]");
+ return 0;
+ }
+
+ rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = conn->epoch;
+ hdr.cid = conn->cid;
+ hdr.callNumber = 0;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+ hdr.flags = conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = conn->service_id;
+
+ word = htonl(abort_code);
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ hdr.serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code);
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * mark a call as being on a now-secured channel
+ * - must be called with softirqs disabled
+ */
+void rxrpc_call_is_secure(struct rxrpc_call *call)
+{
+ _enter("%p", call);
+ if (call) {
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ }
+}
+
+/*
+ * connection-level Rx packet processor
+ */
+static int rxrpc_process_event(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ __be32 tmp;
+ u32 serial;
+ int loop, ret;
+
+ if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED)
+ return -ECONNABORTED;
+
+ serial = ntohl(sp->hdr.serial);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0)
+ return -EPROTO;
+ _proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp));
+
+ conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
+ ntohl(tmp));
+ return -ECONNABORTED;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ if (conn->security)
+ return conn->security->respond_to_challenge(
+ conn, skb, _abort_code);
+ return -EPROTO;
+
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (!conn->security)
+ return -EPROTO;
+
+ ret = conn->security->verify_response(conn, skb, _abort_code);
+ if (ret < 0)
+ return ret;
+
+ ret = conn->security->init_connection_security(conn);
+ if (ret < 0)
+ return ret;
+
+ conn->security->prime_packet_security(conn);
+ read_lock_bh(&conn->lock);
+ spin_lock(&conn->state_lock);
+
+ if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
+ conn->state = RXRPC_CONN_SERVER;
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop]);
+ }
+
+ spin_unlock(&conn->state_lock);
+ read_unlock_bh(&conn->lock);
+ return 0;
+
+ default:
+ return -EPROTO;
+ }
+}
+
+/*
+ * set up security and issue a challenge
+ */
+static void rxrpc_secure_connection(struct rxrpc_connection *conn)
+{
+ u32 abort_code;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ ASSERT(conn->security_ix != 0);
+
+ if (!conn->key) {
+ _debug("set up security");
+ ret = rxrpc_init_server_conn_security(conn);
+ switch (ret) {
+ case 0:
+ break;
+ case -ENOENT:
+ abort_code = RX_CALL_DEAD;
+ goto abort;
+ default:
+ abort_code = RXKADNOAUTH;
+ goto abort;
+ }
+ }
+
+ ASSERT(conn->security != NULL);
+
+ if (conn->security->issue_challenge(conn) < 0) {
+ abort_code = RX_CALL_DEAD;
+ ret = -ENOMEM;
+ goto abort;
+ }
+
+ _leave("");
+ return;
+
+abort:
+ _debug("abort %d, %d", ret, abort_code);
+ rxrpc_abort_connection(conn, -ret, abort_code);
+ _leave(" [aborted]");
+}
+
+/*
+ * connection-level event processor
+ */
+void rxrpc_process_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, processor);
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ u32 abort_code = RX_PROTOCOL_ERROR;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ atomic_inc(&conn->usage);
+
+ if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+ rxrpc_secure_connection(conn);
+ rxrpc_put_connection(conn);
+ }
+
+ /* go through the conn-level event packets, releasing the ref on this
+ * connection that each one has when we've finished with it */
+ while ((skb = skb_dequeue(&conn->rx_queue))) {
+ sp = rxrpc_skb(skb);
+
+ ret = rxrpc_process_event(conn, skb, &abort_code);
+ switch (ret) {
+ case -EPROTO:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ goto protocol_error;
+ case -EAGAIN:
+ goto requeue_and_leave;
+ case -ECONNABORTED:
+ default:
+ rxrpc_put_connection(conn);
+ rxrpc_free_skb(skb);
+ break;
+ }
+ }
+
+out:
+ rxrpc_put_connection(conn);
+ _leave("");
+ return;
+
+requeue_and_leave:
+ skb_queue_head(&conn->rx_queue, skb);
+ goto out;
+
+protocol_error:
+ if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+ goto requeue_and_leave;
+ rxrpc_put_connection(conn);
+ rxrpc_free_skb(skb);
+ _leave(" [EPROTO]");
+ goto out;
+}
+
+/*
+ * put a packet up for transport-level abort
+ */
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ CHECK_SLAB_OKAY(&local->usage);
+
+ if (!atomic_inc_not_zero(&local->usage)) {
+ printk("resurrected on reject\n");
+ BUG();
+ }
+
+ skb_queue_tail(&local->reject_queue, skb);
+ rxrpc_queue_work(&local->rejecter);
+}
+
+/*
+ * reject packets through the local endpoint
+ */
+void rxrpc_reject_packets(struct work_struct *work)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ } sa;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_header hdr;
+ struct rxrpc_local *local;
+ struct sk_buff *skb;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t size;
+ __be32 code;
+
+ local = container_of(work, struct rxrpc_local, rejecter);
+ rxrpc_get_local(local);
+
+ _enter("%d", local->debug_id);
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &code;
+ iov[1].iov_len = sizeof(code);
+ size = sizeof(hdr) + sizeof(code);
+
+ msg.msg_name = &sa;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa.sa_family = local->srx.transport.family;
+ switch (sa.sa.sa_family) {
+ case AF_INET:
+ msg.msg_namelen = sizeof(sa.sin);
+ break;
+ default:
+ msg.msg_namelen = 0;
+ break;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+
+ while ((skb = skb_dequeue(&local->reject_queue))) {
+ sp = rxrpc_skb(skb);
+ switch (sa.sa.sa_family) {
+ case AF_INET:
+ sa.sin.sin_port = udp_hdr(skb)->source;
+ sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ code = htonl(skb->priority);
+
+ hdr.epoch = sp->hdr.epoch;
+ hdr.cid = sp->hdr.cid;
+ hdr.callNumber = sp->hdr.callNumber;
+ hdr.serviceId = sp->hdr.serviceId;
+ hdr.flags = sp->hdr.flags;
+ hdr.flags ^= RXRPC_CLIENT_INITIATED;
+ hdr.flags &= RXRPC_CLIENT_INITIATED;
+
+ kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ break;
+
+ default:
+ break;
+ }
+
+ rxrpc_free_skb(skb);
+ rxrpc_put_local(local);
+ }
+
+ rxrpc_put_local(local);
+ _leave("");
+}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
new file mode 100644
index 000000000000..6cb3e8890e7e
--- /dev/null
+++ b/net/rxrpc/ar-error.c
@@ -0,0 +1,255 @@
+/* Error message handling (ICMP)
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * handle an error received on the local endpoint
+ */
+void rxrpc_UDP_error_report(struct sock *sk)
+{
+ struct sock_exterr_skb *serr;
+ struct rxrpc_transport *trans;
+ struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+ __be32 addr;
+ __be16 port;
+
+ _enter("%p{%d}", sk, local->debug_id);
+
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (!skb) {
+ _leave("UDP socket errqueue empty");
+ return;
+ }
+
+ rxrpc_new_skb(skb);
+
+ serr = SKB_EXT_ERR(skb);
+ addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
+ port = serr->port;
+
+ _net("Rx UDP Error from "NIPQUAD_FMT":%hu",
+ NIPQUAD(addr), ntohs(port));
+ _debug("Msg l:%d d:%d", skb->len, skb->data_len);
+
+ peer = rxrpc_find_peer(local, addr, port);
+ if (IS_ERR(peer)) {
+ rxrpc_free_skb(skb);
+ _leave(" [no peer]");
+ return;
+ }
+
+ trans = rxrpc_find_transport(local, peer);
+ if (!trans) {
+ rxrpc_put_peer(peer);
+ rxrpc_free_skb(skb);
+ _leave(" [no trans]");
+ return;
+ }
+
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_type == ICMP_DEST_UNREACH &&
+ serr->ee.ee_code == ICMP_FRAG_NEEDED
+ ) {
+ u32 mtu = serr->ee.ee_info;
+
+ _net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
+
+ /* wind down the local interface MTU */
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+ peer->if_mtu = mtu;
+ _net("I/F MTU %u", mtu);
+ }
+
+ /* ip_rt_frag_needed() may have eaten the info */
+ if (mtu == 0)
+ mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
+
+ if (mtu == 0) {
+ /* they didn't give us a size, estimate one */
+ if (mtu > 1500) {
+ mtu >>= 1;
+ if (mtu < 1500)
+ mtu = 1500;
+ } else {
+ mtu -= 100;
+ if (mtu < peer->hdrsize)
+ mtu = peer->hdrsize + 4;
+ }
+ }
+
+ if (mtu < peer->mtu) {
+ spin_lock_bh(&peer->lock);
+ peer->mtu = mtu;
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)",
+ peer->mtu, peer->maxdata);
+ }
+ }
+
+ rxrpc_put_peer(peer);
+
+ /* pass the transport ref to error_handler to release */
+ skb_queue_tail(&trans->error_queue, skb);
+ rxrpc_queue_work(&trans->error_handler);
+
+ /* reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ skb = skb_peek(&sk->sk_error_queue);
+ if (skb) {
+ sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else {
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ }
+
+ _leave("");
+}
+
+/*
+ * deal with UDP error messages
+ */
+void rxrpc_UDP_error_handler(struct work_struct *work)
+{
+ struct sock_extended_err *ee;
+ struct sock_exterr_skb *serr;
+ struct rxrpc_transport *trans =
+ container_of(work, struct rxrpc_transport, error_handler);
+ struct sk_buff *skb;
+ int local, err;
+
+ _enter("");
+
+ skb = skb_dequeue(&trans->error_queue);
+ if (!skb)
+ return;
+
+ serr = SKB_EXT_ERR(skb);
+ ee = &serr->ee;
+
+ _net("Rx Error o=%d t=%d c=%d e=%d",
+ ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+ err = ee->ee_errno;
+
+ switch (ee->ee_origin) {
+ case SO_EE_ORIGIN_ICMP:
+ local = 0;
+ switch (ee->ee_type) {
+ case ICMP_DEST_UNREACH:
+ switch (ee->ee_code) {
+ case ICMP_NET_UNREACH:
+ _net("Rx Received ICMP Network Unreachable");
+ err = ENETUNREACH;
+ break;
+ case ICMP_HOST_UNREACH:
+ _net("Rx Received ICMP Host Unreachable");
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_PORT_UNREACH:
+ _net("Rx Received ICMP Port Unreachable");
+ err = ECONNREFUSED;
+ break;
+ case ICMP_FRAG_NEEDED:
+ _net("Rx Received ICMP Fragmentation Needed (%d)",
+ ee->ee_info);
+ err = 0; /* dealt with elsewhere */
+ break;
+ case ICMP_NET_UNKNOWN:
+ _net("Rx Received ICMP Unknown Network");
+ err = ENETUNREACH;
+ break;
+ case ICMP_HOST_UNKNOWN:
+ _net("Rx Received ICMP Unknown Host");
+ err = EHOSTUNREACH;
+ break;
+ default:
+ _net("Rx Received ICMP DestUnreach code=%u",
+ ee->ee_code);
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ _net("Rx Received ICMP TTL Exceeded");
+ break;
+
+ default:
+ _proto("Rx Received ICMP error { type=%u code=%u }",
+ ee->ee_type, ee->ee_code);
+ break;
+ }
+ break;
+
+ case SO_EE_ORIGIN_LOCAL:
+ _proto("Rx Received local error { error=%d }",
+ ee->ee_errno);
+ local = 1;
+ break;
+
+ case SO_EE_ORIGIN_NONE:
+ case SO_EE_ORIGIN_ICMP6:
+ default:
+ _proto("Rx Received error report { orig=%u }",
+ ee->ee_origin);
+ local = 0;
+ break;
+ }
+
+ /* terminate all the affected calls if there's an unrecoverable
+ * error */
+ if (err) {
+ struct rxrpc_call *call, *_n;
+
+ _debug("ISSUE ERROR %d", err);
+
+ spin_lock_bh(&trans->peer->lock);
+ trans->peer->net_error = err;
+
+ list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
+ error_link) {
+ write_lock(&call->state_lock);
+ if (call->state != RXRPC_CALL_COMPLETE &&
+ call->state < RXRPC_CALL_NETWORK_ERROR) {
+ call->state = RXRPC_CALL_NETWORK_ERROR;
+ set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ list_del_init(&call->error_link);
+ }
+
+ spin_unlock_bh(&trans->peer->lock);
+ }
+
+ if (!skb_queue_empty(&trans->error_queue))
+ rxrpc_queue_work(&trans->error_handler);
+
+ rxrpc_free_skb(skb);
+ rxrpc_put_transport(trans);
+ _leave("");
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
new file mode 100644
index 000000000000..91b5bbb003e2
--- /dev/null
+++ b/net/rxrpc/ar-input.c
@@ -0,0 +1,797 @@
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+unsigned long rxrpc_ack_timeout = 1;
+
+const char *rxrpc_pkts[] = {
+ "?00",
+ "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
+ "?09", "?10", "?11", "?12", "?13", "?14", "?15"
+};
+
+/*
+ * queue a packet for recvmsg to pass to userspace
+ * - the caller must hold a lock on call->lock
+ * - must not be called with interrupts disabled (sk_filter() disables BH's)
+ * - eats the packet whether successful or not
+ * - there must be just one reference to the packet, which the caller passes to
+ * this function
+ */
+int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
+ bool force, bool terminal)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_sock *rx = call->socket;
+ struct sock *sk;
+ int skb_len, ret;
+
+ _enter(",,%d,%d", force, terminal);
+
+ ASSERT(!irqs_disabled());
+
+ sp = rxrpc_skb(skb);
+ ASSERTCMP(sp->call, ==, call);
+
+ /* if we've already posted the terminal message for a call, then we
+ * don't post any more */
+ if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+ _debug("already terminated");
+ ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
+ skb->destructor = NULL;
+ sp->call = NULL;
+ rxrpc_put_call(call);
+ rxrpc_free_skb(skb);
+ return 0;
+ }
+
+ sk = &rx->sk;
+
+ if (!force) {
+ /* cast skb->rcvbuf to unsigned... It's pointless, but
+ * reduces number of warnings when compiling with -W
+ * --ANK */
+// ret = -ENOBUFS;
+// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+// (unsigned) sk->sk_rcvbuf)
+// goto out;
+
+ ret = sk_filter(sk, skb);
+ if (ret < 0)
+ goto out;
+ }
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
+ !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ call->socket->sk.sk_state != RXRPC_CLOSE) {
+ skb->destructor = rxrpc_packet_destructor;
+ skb->dev = NULL;
+ skb->sk = sk;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+ if (terminal) {
+ _debug("<<<< TERMINAL MESSAGE >>>>");
+ set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
+ }
+
+ /* allow interception by a kernel service */
+ if (rx->interceptor) {
+ rx->interceptor(sk, call->user_call_ID, skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ } else {
+
+ /* Cache the SKB length before we tack it onto the
+ * receive queue. Once it is added it no longer
+ * belongs to us and may be freed by other threads of
+ * control pulling packets from the queue */
+ skb_len = skb->len;
+
+ _net("post skb %p", skb);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk, skb_len);
+ }
+ skb = NULL;
+ } else {
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ }
+ ret = 0;
+
+out:
+ /* release the socket buffer */
+ if (skb) {
+ skb->destructor = NULL;
+ sp->call = NULL;
+ rxrpc_put_call(call);
+ rxrpc_free_skb(skb);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * process a DATA packet, posting the packet to the appropriate queue
+ * - eats the packet if successful
+ */
+static int rxrpc_fast_process_data(struct rxrpc_call *call,
+ struct sk_buff *skb, u32 seq)
+{
+ struct rxrpc_skb_priv *sp;
+ bool terminal;
+ int ret, ackbit, ack;
+
+ _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+
+ sp = rxrpc_skb(skb);
+ ASSERTCMP(sp->call, ==, NULL);
+
+ spin_lock(&call->lock);
+
+ if (call->state > RXRPC_CALL_COMPLETE)
+ goto discard;
+
+ ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
+ ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
+ ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+
+ if (seq < call->rx_data_post) {
+ _debug("dup #%u [-%u]", seq, call->rx_data_post);
+ ack = RXRPC_ACK_DUPLICATE;
+ ret = -ENOBUFS;
+ goto discard_and_ack;
+ }
+
+ /* we may already have the packet in the out of sequence queue */
+ ackbit = seq - (call->rx_data_eaten + 1);
+ ASSERTCMP(ackbit, >=, 0);
+ if (__test_and_set_bit(ackbit, call->ackr_window)) {
+ _debug("dup oos #%u [%u,%u]",
+ seq, call->rx_data_eaten, call->rx_data_post);
+ ack = RXRPC_ACK_DUPLICATE;
+ goto discard_and_ack;
+ }
+
+ if (seq >= call->ackr_win_top) {
+ _debug("exceed #%u [%u]", seq, call->ackr_win_top);
+ __clear_bit(ackbit, call->ackr_window);
+ ack = RXRPC_ACK_EXCEEDS_WINDOW;
+ goto discard_and_ack;
+ }
+
+ if (seq == call->rx_data_expect) {
+ clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
+ call->rx_data_expect++;
+ } else if (seq > call->rx_data_expect) {
+ _debug("oos #%u [%u]", seq, call->rx_data_expect);
+ call->rx_data_expect = seq + 1;
+ if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
+ ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+ goto enqueue_and_ack;
+ }
+ goto enqueue_packet;
+ }
+
+ if (seq != call->rx_data_post) {
+ _debug("ahead #%u [%u]", seq, call->rx_data_post);
+ goto enqueue_packet;
+ }
+
+ if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
+ goto protocol_error;
+
+ /* if the packet need security things doing to it, then it goes down
+ * the slow path */
+ if (call->conn->security)
+ goto enqueue_packet;
+
+ sp->call = call;
+ rxrpc_get_call(call);
+ terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+ ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
+ if (ret < 0) {
+ if (ret == -ENOMEM || ret == -ENOBUFS) {
+ __clear_bit(ackbit, call->ackr_window);
+ ack = RXRPC_ACK_NOSPACE;
+ goto discard_and_ack;
+ }
+ goto out;
+ }
+
+ skb = NULL;
+
+ _debug("post #%u", seq);
+ ASSERTCMP(call->rx_data_post, ==, seq);
+ call->rx_data_post++;
+
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
+
+ /* if we've reached an out of sequence packet then we need to drain
+ * that queue into the socket Rx queue now */
+ if (call->rx_data_post == call->rx_first_oos) {
+ _debug("drain rx oos now");
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ }
+
+ spin_unlock(&call->lock);
+ atomic_inc(&call->ackr_not_idle);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+ _leave(" = 0 [posted]");
+ return 0;
+
+protocol_error:
+ ret = -EBADMSG;
+out:
+ spin_unlock(&call->lock);
+ _leave(" = %d", ret);
+ return ret;
+
+discard_and_ack:
+ _debug("discard and ACK packet %p", skb);
+ __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+discard:
+ spin_unlock(&call->lock);
+ rxrpc_free_skb(skb);
+ _leave(" = 0 [discarded]");
+ return 0;
+
+enqueue_and_ack:
+ __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+enqueue_packet:
+ _net("defer skb %p", skb);
+ spin_unlock(&call->lock);
+ skb_queue_tail(&call->rx_queue, skb);
+ atomic_inc(&call->ackr_not_idle);
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ _leave(" = 0 [queued]");
+ return 0;
+}
+
+/*
+ * assume an implicit ACKALL of the transmission phase of a client socket upon
+ * reception of the first reply packet
+ */
+static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+{
+ write_lock_bh(&call->state_lock);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ call->acks_latest = serial;
+
+ _debug("implicit ACKALL %%%u", call->acks_latest);
+ set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events);
+ write_unlock_bh(&call->state_lock);
+
+ if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_RESEND, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+ break;
+
+ default:
+ write_unlock_bh(&call->state_lock);
+ break;
+ }
+}
+
+/*
+ * post an incoming packet to the nominated call to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ __be32 _abort_code;
+ u32 serial, hi_serial, seq, abort_code;
+
+ _enter("%p,%p", call, skb);
+
+ ASSERT(!irqs_disabled());
+
+#if 0 // INJECT RX ERROR
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+ static int skip = 0;
+ if (++skip == 3) {
+ printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
+ skip = 0;
+ goto free_packet;
+ }
+ }
+#endif
+
+ /* track the latest serial number on this connection for ACK packet
+ * information */
+ serial = ntohl(sp->hdr.serial);
+ hi_serial = atomic_read(&call->conn->hi_serial);
+ while (serial > hi_serial)
+ hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
+ serial);
+
+ /* request ACK generation for any ACK or DATA packet that requests
+ * it */
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+ _proto("ACK Requested on %%%u", serial);
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
+ !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+ }
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ _debug("abort");
+
+ if (skb_copy_bits(skb, 0, &_abort_code,
+ sizeof(_abort_code)) < 0)
+ goto protocol_error;
+
+ abort_code = ntohl(_abort_code);
+ _proto("Rx ABORT %%%u { %x }", serial, abort_code);
+
+ write_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_REMOTELY_ABORTED;
+ call->abort_code = abort_code;
+ set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ goto free_packet_unlock;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ _proto("Rx BUSY %%%u", serial);
+
+ if (call->conn->out_clientflag)
+ goto protocol_error;
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ call->state = RXRPC_CALL_SERVER_BUSY;
+ set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_SERVER_BUSY:
+ goto free_packet_unlock;
+ default:
+ goto protocol_error_locked;
+ }
+
+ default:
+ _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial);
+ goto protocol_error;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ seq = ntohl(sp->hdr.seq);
+
+ _proto("Rx DATA %%%u { #%u }", serial, seq);
+
+ if (seq == 0)
+ goto protocol_error;
+
+ call->ackr_prev_seq = sp->hdr.seq;
+
+ /* received data implicitly ACKs all of the request packets we
+ * sent when we're acting as a client */
+ if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ rxrpc_assume_implicit_ackall(call, serial);
+
+ switch (rxrpc_fast_process_data(call, skb, seq)) {
+ case 0:
+ skb = NULL;
+ goto done;
+
+ default:
+ BUG();
+
+ /* data packet received beyond the last packet */
+ case -EBADMSG:
+ goto protocol_error;
+ }
+
+ case RXRPC_PACKET_TYPE_ACK:
+ /* ACK processing is done in process context */
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD) {
+ skb_queue_tail(&call->rx_queue, skb);
+ rxrpc_queue_call(call);
+ skb = NULL;
+ }
+ read_unlock_bh(&call->state_lock);
+ goto free_packet;
+ }
+
+protocol_error:
+ _debug("protocol error");
+ write_lock_bh(&call->state_lock);
+protocol_error_locked:
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_PROTOCOL_ERROR;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+free_packet_unlock:
+ write_unlock_bh(&call->state_lock);
+free_packet:
+ rxrpc_free_skb(skb);
+done:
+ _leave("");
+}
+
+/*
+ * split up a jumbo data packet
+ */
+static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
+ struct sk_buff *jumbo)
+{
+ struct rxrpc_jumbo_header jhdr;
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *part;
+
+ _enter(",{%u,%u}", jumbo->data_len, jumbo->len);
+
+ sp = rxrpc_skb(jumbo);
+
+ do {
+ sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
+
+ /* make a clone to represent the first subpacket in what's left
+ * of the jumbo packet */
+ part = skb_clone(jumbo, GFP_ATOMIC);
+ if (!part) {
+ /* simply ditch the tail in the event of ENOMEM */
+ pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
+ break;
+ }
+ rxrpc_new_skb(part);
+
+ pskb_trim(part, RXRPC_JUMBO_DATALEN);
+
+ if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
+ goto protocol_error;
+
+ if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
+ goto protocol_error;
+ if (!pskb_pull(jumbo, sizeof(jhdr)))
+ BUG();
+
+ sp->hdr.seq = htonl(ntohl(sp->hdr.seq) + 1);
+ sp->hdr.serial = htonl(ntohl(sp->hdr.serial) + 1);
+ sp->hdr.flags = jhdr.flags;
+ sp->hdr._rsvd = jhdr._rsvd;
+
+ _proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1);
+
+ rxrpc_fast_process_packet(call, part);
+ part = NULL;
+
+ } while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+
+ rxrpc_fast_process_packet(call, jumbo);
+ _leave("");
+ return;
+
+protocol_error:
+ _debug("protocol error");
+ rxrpc_free_skb(part);
+ rxrpc_free_skb(jumbo);
+ write_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_PROTOCOL_ERROR;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock_bh(&call->state_lock);
+ _leave("");
+}
+
+/*
+ * post an incoming packet to the appropriate call/socket to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_call *call;
+ struct rb_node *p;
+ __be32 call_id;
+
+ _enter("%p,%p", conn, skb);
+
+ read_lock_bh(&conn->lock);
+
+ sp = rxrpc_skb(skb);
+
+ /* look at extant calls by channel number first */
+ call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
+ if (!call || call->call_id != sp->hdr.callNumber)
+ goto call_not_extant;
+
+ _debug("extant call [%d]", call->state);
+ ASSERTCMP(call->conn, ==, conn);
+
+ read_lock(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_NETWORK_ERROR:
+ case RXRPC_CALL_DEAD:
+ goto free_unlock;
+ default:
+ break;
+ }
+
+ read_unlock(&call->state_lock);
+ rxrpc_get_call(call);
+ read_unlock_bh(&conn->lock);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ sp->hdr.flags & RXRPC_JUMBO_PACKET)
+ rxrpc_process_jumbo_packet(call, skb);
+ else
+ rxrpc_fast_process_packet(call, skb);
+
+ rxrpc_put_call(call);
+ goto done;
+
+call_not_extant:
+ /* search the completed calls in case what we're dealing with is
+ * there */
+ _debug("call not extant");
+
+ call_id = sp->hdr.callNumber;
+ p = conn->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, conn_node);
+
+ if (call_id < call->call_id)
+ p = p->rb_left;
+ else if (call_id > call->call_id)
+ p = p->rb_right;
+ else
+ goto found_completed_call;
+ }
+
+dead_call:
+ /* it's a either a really old call that we no longer remember or its a
+ * new incoming call */
+ read_unlock_bh(&conn->lock);
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+ sp->hdr.seq == __constant_cpu_to_be32(1)) {
+ _debug("incoming call");
+ skb_queue_tail(&conn->trans->local->accept_queue, skb);
+ rxrpc_queue_work(&conn->trans->local->acceptor);
+ goto done;
+ }
+
+ _debug("dead call");
+ skb->priority = RX_CALL_DEAD;
+ rxrpc_reject_packet(conn->trans->local, skb);
+ goto done;
+
+ /* resend last packet of a completed call
+ * - client calls may have been aborted or ACK'd
+ * - server calls may have been aborted
+ */
+found_completed_call:
+ _debug("completed call");
+
+ if (atomic_read(&call->usage) == 0)
+ goto dead_call;
+
+ /* synchronise any state changes */
+ read_lock(&call->state_lock);
+ ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
+ call->state, >=, RXRPC_CALL_COMPLETE);
+
+ if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
+ call->state == RXRPC_CALL_REMOTELY_ABORTED ||
+ call->state == RXRPC_CALL_DEAD) {
+ read_unlock(&call->state_lock);
+ goto dead_call;
+ }
+
+ if (call->conn->in_clientflag) {
+ read_unlock(&call->state_lock);
+ goto dead_call; /* complete server call */
+ }
+
+ _debug("final ack again");
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ rxrpc_queue_call(call);
+
+free_unlock:
+ read_unlock(&call->state_lock);
+ read_unlock_bh(&conn->lock);
+ rxrpc_free_skb(skb);
+done:
+ _leave("");
+}
+
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses and some aborts
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", conn, skb);
+
+ atomic_inc(&conn->usage);
+ skb_queue_tail(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn);
+}
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ */
+void rxrpc_data_ready(struct sock *sk, int count)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_local *local;
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+ int ret;
+
+ _enter("%p, %d", sk, count);
+
+ ASSERT(!irqs_disabled());
+
+ read_lock_bh(&rxrpc_local_lock);
+ local = sk->sk_user_data;
+ if (local && atomic_read(&local->usage) > 0)
+ rxrpc_get_local(local);
+ else
+ local = NULL;
+ read_unlock_bh(&rxrpc_local_lock);
+ if (!local) {
+ _leave(" [local dead]");
+ return;
+ }
+
+ skb = skb_recv_datagram(sk, 0, 1, &ret);
+ if (!skb) {
+ rxrpc_put_local(local);
+ if (ret == -EAGAIN)
+ return;
+ _debug("UDP socket error %d", ret);
+ return;
+ }
+
+ rxrpc_new_skb(skb);
+
+ _net("recv skb %p", skb);
+
+ /* we'll probably need to checksum it (didn't call sock_recvmsg) */
+ if (skb_checksum_complete(skb)) {
+ rxrpc_free_skb(skb);
+ rxrpc_put_local(local);
+ _leave(" [CSUM failed]");
+ return;
+ }
+
+ /* the socket buffer we have is owned by UDP, with UDP's data all over
+ * it, but we really want our own */
+ skb_orphan(skb);
+ sp = rxrpc_skb(skb);
+ memset(sp, 0, sizeof(*sp));
+
+ _net("Rx UDP packet from %08x:%04hu",
+ ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
+
+ /* dig out the RxRPC connection details */
+ if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr,
+ sizeof(sp->hdr)) < 0)
+ goto bad_message;
+ if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
+ BUG();
+
+ _net("Rx RxRPC %s ep=%x call=%x:%x",
+ sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
+ ntohl(sp->hdr.epoch),
+ ntohl(sp->hdr.cid),
+ ntohl(sp->hdr.callNumber));
+
+ if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) {
+ _proto("Rx Bad Packet Type %u", sp->hdr.type);
+ goto bad_message;
+ }
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
+ goto bad_message;
+
+ peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
+ if (IS_ERR(peer))
+ goto cant_route_call;
+
+ trans = rxrpc_find_transport(local, peer);
+ rxrpc_put_peer(peer);
+ if (!trans)
+ goto cant_route_call;
+
+ conn = rxrpc_find_connection(trans, &sp->hdr);
+ rxrpc_put_transport(trans);
+ if (!conn)
+ goto cant_route_call;
+
+ _debug("CONN %p {%d}", conn, conn->debug_id);
+
+ if (sp->hdr.callNumber == 0)
+ rxrpc_post_packet_to_conn(conn, skb);
+ else
+ rxrpc_post_packet_to_call(conn, skb);
+ rxrpc_put_connection(conn);
+ rxrpc_put_local(local);
+ return;
+
+cant_route_call:
+ _debug("can't route call");
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+ sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+ if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
+ _debug("first packet");
+ skb_queue_tail(&local->accept_queue, skb);
+ rxrpc_queue_work(&local->acceptor);
+ rxrpc_put_local(local);
+ _leave(" [incoming]");
+ return;
+ }
+ skb->priority = RX_INVALID_OPERATION;
+ } else {
+ skb->priority = RX_CALL_DEAD;
+ }
+
+ _debug("reject");
+ rxrpc_reject_packet(local, skb);
+ rxrpc_put_local(local);
+ _leave(" [no call]");
+ return;
+
+bad_message:
+ skb->priority = RX_PROTOCOL_ERROR;
+ rxrpc_reject_packet(local, skb);
+ rxrpc_put_local(local);
+ _leave(" [badmsg]");
+}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 000000000000..58aaf892238e
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,808 @@
+/* AF_RXRPC internal definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <rxrpc/packet.h>
+
+#if 0
+#define CHECK_SLAB_OKAY(X) \
+ BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
+ (POISON_FREE << 8 | POISON_FREE))
+#else
+#define CHECK_SLAB_OKAY(X) do {} while(0)
+#endif
+
+#define FCRYPT_BSIZE 8
+struct rxrpc_crypt {
+ union {
+ u8 x[FCRYPT_BSIZE];
+ u32 n[2];
+ };
+} __attribute__((aligned(8)));
+
+#define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS))
+#define rxrpc_queue_delayed_work(WS,D) \
+ queue_delayed_work(rxrpc_workqueue, (WS), (D))
+
+#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
+#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
+
+/*
+ * sk_state for RxRPC sockets
+ */
+enum {
+ RXRPC_UNCONNECTED = 0,
+ RXRPC_CLIENT_BOUND, /* client local address bound */
+ RXRPC_CLIENT_CONNECTED, /* client is connected */
+ RXRPC_SERVER_BOUND, /* server local address bound */
+ RXRPC_SERVER_LISTENING, /* server listening for connections */
+ RXRPC_CLOSE, /* socket is being closed */
+};
+
+/*
+ * RxRPC socket definition
+ */
+struct rxrpc_sock {
+ /* WARNING: sk has to be the first member */
+ struct sock sk;
+ rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
+ struct rxrpc_local *local; /* local endpoint */
+ struct rxrpc_transport *trans; /* transport handler */
+ struct rxrpc_conn_bundle *bundle; /* virtual connection bundle */
+ struct rxrpc_connection *conn; /* exclusive virtual connection */
+ struct list_head listen_link; /* link in the local endpoint's listen list */
+ struct list_head secureq; /* calls awaiting connection security clearance */
+ struct list_head acceptq; /* calls awaiting acceptance */
+ struct key *key; /* security for this socket */
+ struct key *securities; /* list of server security descriptors */
+ struct rb_root calls; /* outstanding calls on this socket */
+ unsigned long flags;
+#define RXRPC_SOCK_EXCLUSIVE_CONN 1 /* exclusive connection for a client socket */
+ rwlock_t call_lock; /* lock for calls */
+ u32 min_sec_level; /* minimum security level */
+#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
+ struct sockaddr_rxrpc srx; /* local address */
+ sa_family_t proto; /* protocol created with */
+ __be16 service_id; /* service ID of local/remote service */
+};
+
+#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
+
+/*
+ * RxRPC socket buffer private variables
+ * - max 48 bytes (struct sk_buff::cb)
+ */
+struct rxrpc_skb_priv {
+ struct rxrpc_call *call; /* call with which associated */
+ unsigned long resend_at; /* time in jiffies at which to resend */
+ union {
+ unsigned offset; /* offset into buffer of next read */
+ int remain; /* amount of space remaining for next write */
+ u32 error; /* network error code */
+ bool need_resend; /* T if needs resending */
+ };
+
+ struct rxrpc_header hdr; /* RxRPC packet header from this packet */
+};
+
+#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
+
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+};
+
+/*
+ * RxRPC security module interface
+ */
+struct rxrpc_security {
+ struct module *owner; /* providing module */
+ struct list_head link; /* link in master list */
+ const char *name; /* name of this service */
+ u8 security_index; /* security type provided */
+
+ /* initialise a connection's security */
+ int (*init_connection_security)(struct rxrpc_connection *);
+
+ /* prime a connection's packet security */
+ void (*prime_packet_security)(struct rxrpc_connection *);
+
+ /* impose security on a packet */
+ int (*secure_packet)(const struct rxrpc_call *,
+ struct sk_buff *,
+ size_t,
+ void *);
+
+ /* verify the security on a received packet */
+ int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
+ u32 *);
+
+ /* issue a challenge */
+ int (*issue_challenge)(struct rxrpc_connection *);
+
+ /* respond to a challenge */
+ int (*respond_to_challenge)(struct rxrpc_connection *,
+ struct sk_buff *,
+ u32 *);
+
+ /* verify a response */
+ int (*verify_response)(struct rxrpc_connection *,
+ struct sk_buff *,
+ u32 *);
+
+ /* clear connection security */
+ void (*clear)(struct rxrpc_connection *);
+};
+
+/*
+ * RxRPC local transport endpoint definition
+ * - matched by local port, address and protocol type
+ */
+struct rxrpc_local {
+ struct socket *socket; /* my UDP socket */
+ struct work_struct destroyer; /* endpoint destroyer */
+ struct work_struct acceptor; /* incoming call processor */
+ struct work_struct rejecter; /* packet reject writer */
+ struct list_head services; /* services listening on this endpoint */
+ struct list_head link; /* link in endpoint list */
+ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+ struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
+ struct sk_buff_head reject_queue; /* packets awaiting rejection */
+ spinlock_t lock; /* access lock */
+ rwlock_t services_lock; /* lock for services list */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ volatile char error_rcvd; /* T if received ICMP error outstanding */
+ struct sockaddr_rxrpc srx; /* local address */
+};
+
+/*
+ * RxRPC remote transport endpoint definition
+ * - matched by remote port, address and protocol type
+ * - holds the connection ID counter for connections between the two endpoints
+ */
+struct rxrpc_peer {
+ struct work_struct destroyer; /* peer destroyer */
+ struct list_head link; /* link in master peer list */
+ struct list_head error_targets; /* targets for net error distribution */
+ spinlock_t lock; /* access lock */
+ atomic_t usage;
+ unsigned if_mtu; /* interface MTU for this peer */
+ unsigned mtu; /* network MTU for this peer */
+ unsigned maxdata; /* data size (MTU - hdrsize) */
+ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
+ int debug_id; /* debug ID for printks */
+ int net_error; /* network error distributed */
+ struct sockaddr_rxrpc srx; /* remote address */
+
+ /* calculated RTT cache */
+#define RXRPC_RTT_CACHE_SIZE 32
+ suseconds_t rtt; /* current RTT estimate (in uS) */
+ unsigned rtt_point; /* next entry at which to insert */
+ unsigned rtt_usage; /* amount of cache actually used */
+ suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+};
+
+/*
+ * RxRPC point-to-point transport / connection manager definition
+ * - handles a bundle of connections between two endpoints
+ * - matched by { local, peer }
+ */
+struct rxrpc_transport {
+ struct rxrpc_local *local; /* local transport endpoint */
+ struct rxrpc_peer *peer; /* remote transport endpoint */
+ struct work_struct error_handler; /* network error distributor */
+ struct rb_root bundles; /* client connection bundles on this transport */
+ struct rb_root client_conns; /* client connections on this transport */
+ struct rb_root server_conns; /* server connections on this transport */
+ struct list_head link; /* link in master session list */
+ struct sk_buff_head error_queue; /* error packets awaiting processing */
+ time_t put_time; /* time at which to reap */
+ spinlock_t client_lock; /* client connection allocation lock */
+ rwlock_t conn_lock; /* lock for active/dead connections */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ unsigned int conn_idcounter; /* connection ID counter (client) */
+};
+
+/*
+ * RxRPC client connection bundle
+ * - matched by { transport, service_id, key }
+ */
+struct rxrpc_conn_bundle {
+ struct rb_node node; /* node in transport's lookup tree */
+ struct list_head unused_conns; /* unused connections in this bundle */
+ struct list_head avail_conns; /* available connections in this bundle */
+ struct list_head busy_conns; /* busy connections in this bundle */
+ struct key *key; /* security for this bundle */
+ wait_queue_head_t chanwait; /* wait for channel to become available */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ unsigned short num_conns; /* number of connections in this bundle */
+ __be16 service_id; /* service ID */
+ uint8_t security_ix; /* security type */
+};
+
+/*
+ * RxRPC connection definition
+ * - matched by { transport, service_id, conn_id, direction, key }
+ * - each connection can only handle four simultaneous calls
+ */
+struct rxrpc_connection {
+ struct rxrpc_transport *trans; /* transport session */
+ struct rxrpc_conn_bundle *bundle; /* connection bundle (client) */
+ struct work_struct processor; /* connection event processor */
+ struct rb_node node; /* node in transport's lookup tree */
+ struct list_head link; /* link in master connection list */
+ struct list_head bundle_link; /* link in bundle */
+ struct rb_root calls; /* calls on this connection */
+ struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */
+ struct rxrpc_security *security; /* applied security module */
+ struct key *key; /* security for this connection (client) */
+ struct key *server_key; /* security for this service */
+ struct crypto_blkcipher *cipher; /* encryption handle */
+ struct rxrpc_crypt csum_iv; /* packet checksum base */
+ unsigned long events;
+#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
+ time_t put_time; /* time at which to reap */
+ rwlock_t lock; /* access lock */
+ spinlock_t state_lock; /* state-change lock */
+ atomic_t usage;
+ u32 real_conn_id; /* connection ID (host-endian) */
+ enum { /* current state of connection */
+ RXRPC_CONN_UNUSED, /* - connection not yet attempted */
+ RXRPC_CONN_CLIENT, /* - client connection */
+ RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
+ RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
+ RXRPC_CONN_SERVER, /* - server secured connection */
+ RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
+ RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
+ RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
+ } state;
+ int error; /* error code for local abort */
+ int debug_id; /* debug ID for printks */
+ unsigned call_counter; /* call ID counter */
+ atomic_t serial; /* packet serial number counter */
+ atomic_t hi_serial; /* highest serial number received */
+ u8 avail_calls; /* number of calls available */
+ u8 size_align; /* data size alignment (for security) */
+ u8 header_size; /* rxrpc + security header size */
+ u8 security_size; /* security header size */
+ u32 security_level; /* security level negotiated */
+ u32 security_nonce; /* response re-use preventer */
+
+ /* the following are all in net order */
+ __be32 epoch; /* epoch of this connection */
+ __be32 cid; /* connection ID */
+ __be16 service_id; /* service ID */
+ u8 security_ix; /* security type */
+ u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
+ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
+};
+
+/*
+ * RxRPC call definition
+ * - matched by { connection, call_id }
+ */
+struct rxrpc_call {
+ struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_sock *socket; /* socket responsible */
+ struct timer_list lifetimer; /* lifetime remaining on call */
+ struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */
+ struct timer_list ack_timer; /* ACK generation timer */
+ struct timer_list resend_timer; /* Tx resend timer */
+ struct work_struct destroyer; /* call destroyer */
+ struct work_struct processor; /* packet processor and ACK generator */
+ struct list_head link; /* link in master call list */
+ struct list_head error_link; /* link in error distribution list */
+ struct list_head accept_link; /* calls awaiting acceptance */
+ struct rb_node sock_node; /* node in socket call tree */
+ struct rb_node conn_node; /* node in connection call tree */
+ struct sk_buff_head rx_queue; /* received packets */
+ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
+ struct sk_buff *tx_pending; /* Tx socket buffer being filled */
+ wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ unsigned long user_call_ID; /* user-defined call ID */
+ unsigned long creation_jif; /* time of call creation */
+ unsigned long flags;
+#define RXRPC_CALL_RELEASED 0 /* call has been released - no more message to userspace */
+#define RXRPC_CALL_TERMINAL_MSG 1 /* call has given the socket its final message */
+#define RXRPC_CALL_RCVD_LAST 2 /* all packets received */
+#define RXRPC_CALL_RUN_RTIMER 3 /* Tx resend timer started */
+#define RXRPC_CALL_TX_SOFT_ACK 4 /* sent some soft ACKs */
+#define RXRPC_CALL_PROC_BUSY 5 /* the processor is busy */
+#define RXRPC_CALL_INIT_ACCEPT 6 /* acceptance was initiated */
+#define RXRPC_CALL_HAS_USERID 7 /* has a user ID attached */
+#define RXRPC_CALL_EXPECT_OOS 8 /* expect out of sequence packets */
+ unsigned long events;
+#define RXRPC_CALL_RCVD_ACKALL 0 /* ACKALL or reply received */
+#define RXRPC_CALL_RCVD_BUSY 1 /* busy packet received */
+#define RXRPC_CALL_RCVD_ABORT 2 /* abort packet received */
+#define RXRPC_CALL_RCVD_ERROR 3 /* network error received */
+#define RXRPC_CALL_ACK_FINAL 4 /* need to generate final ACK (and release call) */
+#define RXRPC_CALL_ACK 5 /* need to generate ACK */
+#define RXRPC_CALL_REJECT_BUSY 6 /* need to generate busy message */
+#define RXRPC_CALL_ABORT 7 /* need to generate abort */
+#define RXRPC_CALL_CONN_ABORT 8 /* local connection abort generated */
+#define RXRPC_CALL_RESEND_TIMER 9 /* Tx resend timer expired */
+#define RXRPC_CALL_RESEND 10 /* Tx resend required */
+#define RXRPC_CALL_DRAIN_RX_OOS 11 /* drain the Rx out of sequence queue */
+#define RXRPC_CALL_LIFE_TIMER 12 /* call's lifetimer ran out */
+#define RXRPC_CALL_ACCEPTED 13 /* incoming call accepted by userspace app */
+#define RXRPC_CALL_SECURED 14 /* incoming call's connection is now secure */
+#define RXRPC_CALL_POST_ACCEPT 15 /* need to post an "accept?" message to the app */
+#define RXRPC_CALL_RELEASE 16 /* need to release the call's resources */
+
+ spinlock_t lock;
+ rwlock_t state_lock; /* lock for state transition */
+ atomic_t usage;
+ atomic_t sequence; /* Tx data packet sequence counter */
+ u32 abort_code; /* local/remote abort code */
+ enum { /* current state of call */
+ RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
+ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
+ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
+ RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
+ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
+ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
+ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
+ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
+ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
+ RXRPC_CALL_COMPLETE, /* - call completed */
+ RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ RXRPC_CALL_DEAD, /* - call is dead */
+ } state;
+ int debug_id; /* debug ID for printks */
+ u8 channel; /* connection channel occupied by this call */
+
+ /* transmission-phase ACK management */
+ uint8_t acks_head; /* offset into window of first entry */
+ uint8_t acks_tail; /* offset into window of last entry */
+ uint8_t acks_winsz; /* size of un-ACK'd window */
+ uint8_t acks_unacked; /* lowest unacked packet in last ACK received */
+ int acks_latest; /* serial number of latest ACK received */
+ rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */
+ unsigned long *acks_window; /* sent packet window
+ * - elements are pointers with LSB set if ACK'd
+ */
+
+ /* receive-phase ACK management */
+ rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */
+ rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */
+ rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */
+ rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
+ rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
+ rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
+ rxrpc_seq_net_t ackr_prev_seq; /* previous sequence number received */
+ uint8_t ackr_reason; /* reason to ACK */
+ __be32 ackr_serial; /* serial of packet being ACK'd */
+ atomic_t ackr_not_idle; /* number of packets in Rx queue */
+
+ /* received packet records, 1 bit per record */
+#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
+ unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+
+ /* the following should all be in net order */
+ __be32 cid; /* connection ID + channel index */
+ __be32 call_id; /* call ID on connection */
+};
+
+/*
+ * RxRPC key for Kerberos (type-2 security)
+ */
+struct rxkad_key {
+ u16 security_index; /* RxRPC header security index */
+ u16 ticket_len; /* length of ticket[] */
+ u32 expiry; /* time at which expires */
+ u32 kvno; /* key version number */
+ u8 session_key[8]; /* DES session key */
+ u8 ticket[0]; /* the encrypted ticket */
+};
+
+struct rxrpc_key_payload {
+ struct rxkad_key k;
+};
+
+/*
+ * locally abort an RxRPC call
+ */
+static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+ write_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->abort_code = abort_code;
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ }
+ write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * af_rxrpc.c
+ */
+extern atomic_t rxrpc_n_skbs;
+extern __be32 rxrpc_epoch;
+extern atomic_t rxrpc_debug_id;
+extern struct workqueue_struct *rxrpc_workqueue;
+
+/*
+ * ar-accept.c
+ */
+extern void rxrpc_accept_incoming_calls(struct work_struct *);
+extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
+ unsigned long);
+extern int rxrpc_reject_call(struct rxrpc_sock *);
+
+/*
+ * ar-ack.c
+ */
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_process_call(struct work_struct *);
+
+/*
+ * ar-call.c
+ */
+extern struct kmem_cache *rxrpc_call_jar;
+extern struct list_head rxrpc_calls;
+extern rwlock_t rxrpc_call_lock;
+
+extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *,
+ unsigned long, int, gfp_t);
+extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
+ struct rxrpc_connection *,
+ struct rxrpc_header *, gfp_t);
+extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
+ unsigned long);
+extern void rxrpc_release_call(struct rxrpc_call *);
+extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+extern void __rxrpc_put_call(struct rxrpc_call *);
+extern void __exit rxrpc_destroy_all_calls(void);
+
+/*
+ * ar-connection.c
+ */
+extern struct list_head rxrpc_connections;
+extern rwlock_t rxrpc_connection_lock;
+
+extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct key *,
+ __be16, gfp_t);
+extern void rxrpc_put_bundle(struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *);
+extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *, struct rxrpc_call *,
+ gfp_t);
+extern void rxrpc_put_connection(struct rxrpc_connection *);
+extern void __exit rxrpc_destroy_all_connections(void);
+extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
+ struct rxrpc_header *);
+extern struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
+ gfp_t);
+
+/*
+ * ar-connevent.c
+ */
+extern void rxrpc_process_connection(struct work_struct *);
+extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
+extern void rxrpc_reject_packets(struct work_struct *);
+
+/*
+ * ar-error.c
+ */
+extern void rxrpc_UDP_error_report(struct sock *);
+extern void rxrpc_UDP_error_handler(struct work_struct *);
+
+/*
+ * ar-input.c
+ */
+extern unsigned long rxrpc_ack_timeout;
+extern const char *rxrpc_pkts[];
+
+extern void rxrpc_data_ready(struct sock *, int);
+extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
+ bool);
+extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * ar-local.c
+ */
+extern rwlock_t rxrpc_local_lock;
+extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
+extern void rxrpc_put_local(struct rxrpc_local *);
+extern void __exit rxrpc_destroy_all_locals(void);
+
+/*
+ * ar-key.c
+ */
+extern struct key_type key_type_rxrpc;
+extern struct key_type key_type_rxrpc_s;
+
+extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
+ time_t, u32);
+
+/*
+ * ar-output.c
+ */
+extern int rxrpc_resend_timeout;
+
+extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
+extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
+ struct rxrpc_transport *, struct msghdr *,
+ size_t);
+extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
+ struct msghdr *, size_t);
+
+/*
+ * ar-peer.c
+ */
+extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
+extern void rxrpc_put_peer(struct rxrpc_peer *);
+extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
+ __be32, __be16);
+extern void __exit rxrpc_destroy_all_peers(void);
+
+/*
+ * ar-proc.c
+ */
+extern const char *rxrpc_call_states[];
+extern struct file_operations rxrpc_call_seq_fops;
+extern struct file_operations rxrpc_connection_seq_fops;
+
+/*
+ * ar-recvmsg.c
+ */
+extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
+ size_t, int);
+
+/*
+ * ar-security.c
+ */
+extern int rxrpc_register_security(struct rxrpc_security *);
+extern void rxrpc_unregister_security(struct rxrpc_security *);
+extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
+ size_t, void *);
+extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
+ u32 *);
+extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
+
+/*
+ * ar-skbuff.c
+ */
+extern void rxrpc_packet_destructor(struct sk_buff *);
+
+/*
+ * ar-transport.c
+ */
+extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
+ struct rxrpc_peer *,
+ gfp_t);
+extern void rxrpc_put_transport(struct rxrpc_transport *);
+extern void __exit rxrpc_destroy_all_transports(void);
+extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
+ struct rxrpc_peer *);
+
+/*
+ * debug tracing
+ */
+extern unsigned rxrpc_debug;
+
+#define dbgprintk(FMT,...) \
+ printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
+#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__)
+#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...) knet(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AF_RXRPC_DEBUG)
+#define RXRPC_DEBUG_KENTER 0x01
+#define RXRPC_DEBUG_KLEAVE 0x02
+#define RXRPC_DEBUG_KDEBUG 0x04
+#define RXRPC_DEBUG_KPROTO 0x08
+#define RXRPC_DEBUG_KNET 0x10
+
+#define _enter(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \
+ kenter(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \
+ kleave(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \
+ kdebug(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _proto(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \
+ kproto(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _net(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \
+ knet(FMT,##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#else
+
+#define ASSERT(X) \
+do { \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+} while(0)
+
+#endif /* __KDEBUGALL */
+
+/*
+ * socket buffer accounting / leak finding
+ */
+static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
+{
+ //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_inc(&rxrpc_n_skbs);
+}
+
+#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
+
+static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
+{
+ //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_dec(&rxrpc_n_skbs);
+}
+
+#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
+
+static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
+{
+ if (skb) {
+ CHECK_SLAB_OKAY(&skb->users);
+ //_net("free skb %p %s [%d]",
+ // skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_dec(&rxrpc_n_skbs);
+ kfree_skb(skb);
+ }
+}
+
+#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
+
+static inline void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue((list))) != NULL)
+ rxrpc_free_skb(skb);
+}
+
+static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
+{
+ CHECK_SLAB_OKAY(&local->usage);
+ if (atomic_inc_return(&local->usage) == 1)
+ printk("resurrected (%s)\n", f);
+}
+
+#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
+
+#define rxrpc_get_call(CALL) \
+do { \
+ CHECK_SLAB_OKAY(&(CALL)->usage); \
+ if (atomic_inc_return(&(CALL)->usage) == 1) \
+ BUG(); \
+} while(0)
+
+#define rxrpc_put_call(CALL) \
+do { \
+ __rxrpc_put_call(CALL); \
+} while(0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
new file mode 100644
index 000000000000..7e049ff6ae60
--- /dev/null
+++ b/net/rxrpc/ar-key.c
@@ -0,0 +1,334 @@
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ * "afs@CAMBRIDGE.REDHAT.COM>
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_instantiate(struct key *, const void *, size_t);
+static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static void rxrpc_destroy(struct key *);
+static void rxrpc_destroy_s(struct key *);
+static void rxrpc_describe(const struct key *, struct seq_file *);
+
+/*
+ * rxrpc defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_rxrpc = {
+ .name = "rxrpc",
+ .instantiate = rxrpc_instantiate,
+ .match = user_match,
+ .destroy = rxrpc_destroy,
+ .describe = rxrpc_describe,
+};
+
+EXPORT_SYMBOL(key_type_rxrpc);
+
+/*
+ * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the
+ * description and an 8-byte decryption key as the payload
+ */
+struct key_type key_type_rxrpc_s = {
+ .name = "rxrpc_s",
+ .instantiate = rxrpc_instantiate_s,
+ .match = user_match,
+ .destroy = rxrpc_destroy_s,
+ .describe = rxrpc_describe,
+};
+
+/*
+ * instantiate an rxrpc defined key
+ * data should be of the form:
+ * OFFSET LEN CONTENT
+ * 0 4 key interface version number
+ * 4 2 security index (type)
+ * 6 2 ticket length
+ * 8 4 key expiry time (time_t)
+ * 12 4 kvno
+ * 16 8 session key
+ * 24 [len] ticket
+ *
+ * if no data is provided, then a no-security key is made
+ */
+static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+{
+ const struct rxkad_key *tsec;
+ struct rxrpc_key_payload *upayload;
+ size_t plen;
+ u32 kver;
+ int ret;
+
+ _enter("{%x},,%zu", key_serial(key), datalen);
+
+ /* handle a no-security key */
+ if (!data && datalen == 0)
+ return 0;
+
+ /* get the key interface version number */
+ ret = -EINVAL;
+ if (datalen <= 4 || !data)
+ goto error;
+ memcpy(&kver, data, sizeof(kver));
+ data += sizeof(kver);
+ datalen -= sizeof(kver);
+
+ _debug("KEY I/F VERSION: %u", kver);
+
+ ret = -EKEYREJECTED;
+ if (kver != 1)
+ goto error;
+
+ /* deal with a version 1 key */
+ ret = -EINVAL;
+ if (datalen < sizeof(*tsec))
+ goto error;
+
+ tsec = data;
+ if (datalen != sizeof(*tsec) + tsec->ticket_len)
+ goto error;
+
+ _debug("SCIX: %u", tsec->security_index);
+ _debug("TLEN: %u", tsec->ticket_len);
+ _debug("EXPY: %x", tsec->expiry);
+ _debug("KVNO: %u", tsec->kvno);
+ _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+ tsec->session_key[0], tsec->session_key[1],
+ tsec->session_key[2], tsec->session_key[3],
+ tsec->session_key[4], tsec->session_key[5],
+ tsec->session_key[6], tsec->session_key[7]);
+ if (tsec->ticket_len >= 8)
+ _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+ tsec->ticket[0], tsec->ticket[1],
+ tsec->ticket[2], tsec->ticket[3],
+ tsec->ticket[4], tsec->ticket[5],
+ tsec->ticket[6], tsec->ticket[7]);
+
+ ret = -EPROTONOSUPPORT;
+ if (tsec->security_index != 2)
+ goto error;
+
+ key->type_data.x[0] = tsec->security_index;
+
+ plen = sizeof(*upayload) + tsec->ticket_len;
+ ret = key_payload_reserve(key, plen);
+ if (ret < 0)
+ goto error;
+
+ ret = -ENOMEM;
+ upayload = kmalloc(plen, GFP_KERNEL);
+ if (!upayload)
+ goto error;
+
+ /* attach the data */
+ memcpy(&upayload->k, tsec, sizeof(*tsec));
+ memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
+ tsec->ticket_len);
+ key->payload.data = upayload;
+ key->expiry = tsec->expiry;
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * instantiate a server secret key
+ * data should be a pointer to the 8-byte secret key
+ */
+static int rxrpc_instantiate_s(struct key *key, const void *data,
+ size_t datalen)
+{
+ struct crypto_blkcipher *ci;
+
+ _enter("{%x},,%zu", key_serial(key), datalen);
+
+ if (datalen != 8)
+ return -EINVAL;
+
+ memcpy(&key->type_data, data, 8);
+
+ ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ci)) {
+ _leave(" = %ld", PTR_ERR(ci));
+ return PTR_ERR(ci);
+ }
+
+ if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+ BUG();
+
+ key->payload.data = ci;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy(struct key *key)
+{
+ kfree(key->payload.data);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy_s(struct key *key)
+{
+ if (key->payload.data) {
+ crypto_free_blkcipher(key->payload.data);
+ key->payload.data = NULL;
+ }
+}
+
+/*
+ * describe the rxrpc key
+ */
+static void rxrpc_describe(const struct key *key, struct seq_file *m)
+{
+ seq_puts(m, key->description);
+}
+
+/*
+ * grab the security key for a socket
+ */
+int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ description = kmalloc(optlen + 1, GFP_KERNEL);
+ if (!description)
+ return -ENOMEM;
+
+ if (copy_from_user(description, optval, optlen)) {
+ kfree(description);
+ return -EFAULT;
+ }
+ description[optlen] = 0;
+
+ key = request_key(&key_type_rxrpc, description, NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->key = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/*
+ * grab the security keyring for a server socket
+ */
+int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
+ int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ description = kmalloc(optlen + 1, GFP_KERNEL);
+ if (!description)
+ return -ENOMEM;
+
+ if (copy_from_user(description, optval, optlen)) {
+ kfree(description);
+ return -EFAULT;
+ }
+ description[optlen] = 0;
+
+ key = request_key(&key_type_keyring, description, NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->securities = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/*
+ * generate a server data key
+ */
+int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
+ const void *session_key,
+ time_t expiry,
+ u32 kvno)
+{
+ struct key *key;
+ int ret;
+
+ struct {
+ u32 kver;
+ struct rxkad_key tsec;
+ } data;
+
+ _enter("");
+
+ key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(key)) {
+ _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+ return -ENOMEM;
+ }
+
+ _debug("key %d", key_serial(key));
+
+ data.kver = 1;
+ data.tsec.security_index = 2;
+ data.tsec.ticket_len = 0;
+ data.tsec.expiry = expiry;
+ data.tsec.kvno = 0;
+
+ memcpy(&data.tsec.session_key, session_key,
+ sizeof(data.tsec.session_key));
+
+ ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
+ if (ret < 0)
+ goto error;
+
+ conn->key = key;
+ _leave(" = 0 [%d]", key_serial(key));
+ return 0;
+
+error:
+ key_revoke(key);
+ key_put(key);
+ _leave(" = -ENOMEM [ins %d]", ret);
+ return -ENOMEM;
+}
+
+EXPORT_SYMBOL(rxrpc_get_server_data_key);
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
new file mode 100644
index 000000000000..fe03f71f17da
--- /dev/null
+++ b/net/rxrpc/ar-local.c
@@ -0,0 +1,309 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_locals);
+DEFINE_RWLOCK(rxrpc_local_lock);
+static DECLARE_RWSEM(rxrpc_local_sem);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
+
+static void rxrpc_destroy_local(struct work_struct *work);
+
+/*
+ * allocate a new local
+ */
+static
+struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+
+ local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+ if (local) {
+ INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
+ INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
+ INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+ INIT_LIST_HEAD(&local->services);
+ INIT_LIST_HEAD(&local->link);
+ init_rwsem(&local->defrag_sem);
+ skb_queue_head_init(&local->accept_queue);
+ skb_queue_head_init(&local->reject_queue);
+ spin_lock_init(&local->lock);
+ rwlock_init(&local->services_lock);
+ atomic_set(&local->usage, 1);
+ local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&local->srx, srx, sizeof(*srx));
+ }
+
+ _leave(" = %p", local);
+ return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_sem writelocked
+ */
+static int rxrpc_create_local(struct rxrpc_local *local)
+{
+ struct sock *sock;
+ int ret, opt;
+
+ _enter("%p{%d}", local, local->srx.transport_type);
+
+ /* create a socket to represent the local endpoint */
+ ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
+ &local->socket);
+ if (ret < 0) {
+ _leave(" = %d [socket]", ret);
+ return ret;
+ }
+
+ /* if a local address was supplied then bind it */
+ if (local->srx.transport_len > sizeof(sa_family_t)) {
+ _debug("bind");
+ ret = kernel_bind(local->socket,
+ (struct sockaddr *) &local->srx.transport,
+ local->srx.transport_len);
+ if (ret < 0) {
+ _debug("bind failed");
+ goto error;
+ }
+ }
+
+ /* we want to receive ICMP errors */
+ opt = 1;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ /* we want to set the don't fragment bit */
+ opt = IP_PMTUDISC_DO;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ write_lock_bh(&rxrpc_local_lock);
+ list_add(&local->link, &rxrpc_locals);
+ write_unlock_bh(&rxrpc_local_lock);
+
+ /* set the socket up */
+ sock = local->socket->sk;
+ sock->sk_user_data = local;
+ sock->sk_data_ready = rxrpc_data_ready;
+ sock->sk_error_report = rxrpc_UDP_error_report;
+ _leave(" = 0");
+ return 0;
+
+error:
+ local->socket->ops->shutdown(local->socket, 2);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * create a new local endpoint using the specified UDP address
+ */
+struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+ int ret;
+
+ _enter("{%d,%u,%u.%u.%u.%u+%hu}",
+ srx->transport_type,
+ srx->transport.family,
+ NIPQUAD(srx->transport.sin.sin_addr),
+ ntohs(srx->transport.sin.sin_port));
+
+ down_write(&rxrpc_local_sem);
+
+ /* see if we have a suitable local local endpoint already */
+ read_lock_bh(&rxrpc_local_lock);
+
+ list_for_each_entry(local, &rxrpc_locals, link) {
+ _debug("CMP {%d,%u,%u.%u.%u.%u+%hu}",
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ if (local->srx.transport_type != srx->transport_type ||
+ local->srx.transport.family != srx->transport.family)
+ continue;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (local->srx.transport.sin.sin_port !=
+ srx->transport.sin.sin_port)
+ continue;
+ if (memcmp(&local->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr)) != 0)
+ continue;
+ goto found_local;
+
+ default:
+ BUG();
+ }
+ }
+
+ read_unlock_bh(&rxrpc_local_lock);
+
+ /* we didn't find one, so we need to create one */
+ local = rxrpc_alloc_local(srx);
+ if (!local) {
+ up_write(&rxrpc_local_sem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = rxrpc_create_local(local);
+ if (ret < 0) {
+ up_write(&rxrpc_local_sem);
+ kfree(local);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+ }
+
+ up_write(&rxrpc_local_sem);
+
+ _net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}",
+ local->debug_id,
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ _leave(" = %p [new]", local);
+ return local;
+
+found_local:
+ rxrpc_get_local(local);
+ read_unlock_bh(&rxrpc_local_lock);
+ up_write(&rxrpc_local_sem);
+
+ _net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}",
+ local->debug_id,
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ _leave(" = %p [reuse]", local);
+ return local;
+}
+
+/*
+ * release a local endpoint
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+ _enter("%p{u=%d}", local, atomic_read(&local->usage));
+
+ ASSERTCMP(atomic_read(&local->usage), >, 0);
+
+ /* to prevent a race, the decrement and the dequeue must be effectively
+ * atomic */
+ write_lock_bh(&rxrpc_local_lock);
+ if (unlikely(atomic_dec_and_test(&local->usage))) {
+ _debug("destroy local");
+ rxrpc_queue_work(&local->destroyer);
+ }
+ write_unlock_bh(&rxrpc_local_lock);
+ _leave("");
+}
+
+/*
+ * destroy a local endpoint
+ */
+static void rxrpc_destroy_local(struct work_struct *work)
+{
+ struct rxrpc_local *local =
+ container_of(work, struct rxrpc_local, destroyer);
+
+ _enter("%p{%d}", local, atomic_read(&local->usage));
+
+ down_write(&rxrpc_local_sem);
+
+ write_lock_bh(&rxrpc_local_lock);
+ if (atomic_read(&local->usage) > 0) {
+ write_unlock_bh(&rxrpc_local_lock);
+ up_read(&rxrpc_local_sem);
+ _leave(" [resurrected]");
+ return;
+ }
+
+ list_del(&local->link);
+ local->socket->sk->sk_user_data = NULL;
+ write_unlock_bh(&rxrpc_local_lock);
+
+ downgrade_write(&rxrpc_local_sem);
+
+ ASSERT(list_empty(&local->services));
+ ASSERT(!work_pending(&local->acceptor));
+ ASSERT(!work_pending(&local->rejecter));
+
+ /* finish cleaning up the local descriptor */
+ rxrpc_purge_queue(&local->accept_queue);
+ rxrpc_purge_queue(&local->reject_queue);
+ local->socket->ops->shutdown(local->socket, 2);
+ sock_release(local->socket);
+
+ up_read(&rxrpc_local_sem);
+
+ _net("DESTROY LOCAL %d", local->debug_id);
+ kfree(local);
+
+ if (list_empty(&rxrpc_locals))
+ wake_up_all(&rxrpc_local_wq);
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all local local endpoint rather than waiting for
+ * them to be destroyed
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+ DECLARE_WAITQUEUE(myself,current);
+
+ _enter("");
+
+ /* we simply have to wait for them to go away */
+ if (!list_empty(&rxrpc_locals)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&rxrpc_local_wq, &myself);
+
+ while (!list_empty(&rxrpc_locals)) {
+ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&rxrpc_local_wq, &myself);
+ set_current_state(TASK_RUNNING);
+ }
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
new file mode 100644
index 000000000000..591c4422205e
--- /dev/null
+++ b/net/rxrpc/ar-output.c
@@ -0,0 +1,734 @@
+/* RxRPC packet transmission
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+int rxrpc_resend_timeout = 4;
+
+static int rxrpc_send_data(struct kiocb *iocb,
+ struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len);
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ unsigned long *user_call_ID,
+ enum rxrpc_command *command,
+ u32 *abort_code,
+ bool server)
+{
+ struct cmsghdr *cmsg;
+ int len;
+
+ *command = RXRPC_CMD_SEND_DATA;
+
+ if (msg->msg_controllen == 0)
+ return -EINVAL;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+ _debug("CMSG %d, %d, %d",
+ cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+ if (cmsg->cmsg_level != SOL_RXRPC)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case RXRPC_USER_CALL_ID:
+ if (msg->msg_flags & MSG_CMSG_COMPAT) {
+ if (len != sizeof(u32))
+ return -EINVAL;
+ *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+ } else {
+ if (len != sizeof(unsigned long))
+ return -EINVAL;
+ *user_call_ID = *(unsigned long *)
+ CMSG_DATA(cmsg);
+ }
+ _debug("User Call ID %lx", *user_call_ID);
+ break;
+
+ case RXRPC_ABORT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_SEND_ABORT;
+ if (len != sizeof(*abort_code))
+ return -EINVAL;
+ *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+ _debug("Abort %x", *abort_code);
+ if (*abort_code == 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_ACCEPT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_ACCEPT;
+ if (len != 0)
+ return -EINVAL;
+ if (!server)
+ return -EISCONN;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * abort a call, sending an ABORT packet to the peer
+ */
+static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+{
+ write_lock_bh(&call->state_lock);
+
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = abort_code;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ rxrpc_queue_call(call);
+ }
+
+ write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans, struct msghdr *msg,
+ size_t len)
+{
+ struct rxrpc_conn_bundle *bundle;
+ enum rxrpc_command cmd;
+ struct rxrpc_call *call;
+ unsigned long user_call_ID = 0;
+ struct key *key;
+ __be16 service_id;
+ u32 abort_code = 0;
+ int ret;
+
+ _enter("");
+
+ ASSERT(trans != NULL);
+
+ ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+ false);
+ if (ret < 0)
+ return ret;
+
+ bundle = NULL;
+ if (trans) {
+ service_id = rx->service_id;
+ if (msg->msg_name) {
+ struct sockaddr_rxrpc *srx =
+ (struct sockaddr_rxrpc *) msg->msg_name;
+ service_id = htons(srx->srx_service);
+ }
+ key = rx->key;
+ if (key && !rx->key->payload.data)
+ key = NULL;
+ bundle = rxrpc_get_bundle(rx, trans, key, service_id,
+ GFP_KERNEL);
+ if (IS_ERR(bundle))
+ return PTR_ERR(bundle);
+ }
+
+ call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
+ abort_code == 0, GFP_KERNEL);
+ if (trans)
+ rxrpc_put_bundle(trans, bundle);
+ if (IS_ERR(call)) {
+ _leave(" = %ld", PTR_ERR(call));
+ return PTR_ERR(call);
+ }
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ /* it's too late for this call */
+ ret = -ESHUTDOWN;
+ } else if (cmd == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_send_abort(call, abort_code);
+ } else if (cmd != RXRPC_CMD_SEND_DATA) {
+ ret = -EINVAL;
+ } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ /* request phase complete for this client call */
+ ret = -EPROTO;
+ } else {
+ ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ }
+
+ rxrpc_put_call(call);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call. The call must be in an state
+ * appropriate to sending data. No control data should be supplied in @msg,
+ * nor should an address be supplied. MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+ size_t len)
+{
+ int ret;
+
+ _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+ ASSERTCMP(msg->msg_name, ==, NULL);
+ ASSERTCMP(msg->msg_control, ==, NULL);
+
+ lock_sock(&call->socket->sk);
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -ESHUTDOWN; /* it's too late for this call */
+ } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ ret = -EPROTO; /* request phase complete for this client call */
+ } else {
+ mm_segment_t oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+ set_fs(oldfs);
+ }
+
+ release_sock(&call->socket->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/*
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+ _enter("{%d},%d", call->debug_id, abort_code);
+
+ lock_sock(&call->socket->sk);
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state < RXRPC_CALL_COMPLETE)
+ rxrpc_send_abort(call, abort_code);
+
+ release_sock(&call->socket->sk);
+ _leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/*
+ * send a message through a server socket
+ * - caller holds the socket locked
+ */
+int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+ struct msghdr *msg, size_t len)
+{
+ enum rxrpc_command cmd;
+ struct rxrpc_call *call;
+ unsigned long user_call_ID = 0;
+ u32 abort_code = 0;
+ int ret;
+
+ _enter("");
+
+ ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+ true);
+ if (ret < 0)
+ return ret;
+
+ if (cmd == RXRPC_CMD_ACCEPT) {
+ call = rxrpc_accept_call(rx, user_call_ID);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ rxrpc_put_call(call);
+ return 0;
+ }
+
+ call = rxrpc_find_server_call(rx, user_call_ID);
+ if (!call)
+ return -EBADSLT;
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -ESHUTDOWN;
+ goto out;
+ }
+
+ switch (cmd) {
+ case RXRPC_CMD_SEND_DATA:
+ if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Tx phase not yet begun for this call */
+ ret = -EPROTO;
+ break;
+ }
+
+ ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ break;
+
+ case RXRPC_CMD_SEND_ABORT:
+ rxrpc_send_abort(call, abort_code);
+ break;
+ default:
+ BUG();
+ }
+
+ out:
+ rxrpc_put_call(call);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+{
+ struct kvec iov[1];
+ struct msghdr msg;
+ int ret, opt;
+
+ _enter(",{%d}", skb->len);
+
+ iov[0].iov_base = skb->head;
+ iov[0].iov_len = skb->len;
+
+ msg.msg_name = &trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ /* send the packet with the don't fragment bit set if we currently
+ * think it's small enough */
+ if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
+ down_read(&trans->local->defrag_sem);
+ /* send the packet by UDP
+ * - returns -EMSGSIZE if UDP would have to fragment the packet
+ * to go out of the interface
+ * - in which case, we'll have processed the ICMP error
+ * message and update the peer record
+ */
+ ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+ iov[0].iov_len);
+
+ up_read(&trans->local->defrag_sem);
+ if (ret == -EMSGSIZE)
+ goto send_fragmentable;
+
+ _leave(" = %d [%u]", ret, trans->peer->maxdata);
+ return ret;
+ }
+
+send_fragmentable:
+ /* attempt to send this message with fragmentation enabled */
+ _debug("send fragment");
+
+ down_write(&trans->local->defrag_sem);
+ opt = IP_PMTUDISC_DONT;
+ ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret == 0) {
+ ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+ iov[0].iov_len);
+
+ opt = IP_PMTUDISC_DO;
+ kernel_setsockopt(trans->local->socket, SOL_IP,
+ IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+ }
+
+ up_write(&trans->local->defrag_sem);
+ _leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+ return ret;
+}
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret;
+
+ _enter(",{%d},%ld",
+ CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
+ *timeo);
+
+ add_wait_queue(&call->tx_waitq, &myself);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ ret = 0;
+ if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 0)
+ break;
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+
+ release_sock(&rx->sk);
+ *timeo = schedule_timeout(*timeo);
+ lock_sock(&rx->sk);
+ }
+
+ remove_wait_queue(&call->tx_waitq, &myself);
+ set_current_state(TASK_RUNNING);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * attempt to schedule an instant Tx resend
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call)
+{
+ read_lock_bh(&call->state_lock);
+ if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * queue a packet for transmission, set the resend timer and attempt
+ * to send the packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ bool last)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ int ret;
+
+ _net("queue skb %p [%d]", skb, call->acks_head);
+
+ ASSERT(call->acks_window != NULL);
+ call->acks_window[call->acks_head] = (unsigned long) skb;
+ smp_wmb();
+ call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
+
+ if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+ _debug("________awaiting reply/ACK__________");
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ break;
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+ if (!last)
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+ break;
+ default:
+ break;
+ }
+ write_unlock_bh(&call->state_lock);
+ }
+
+ _proto("Tx DATA %%%u { #%u }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+ sp->need_resend = 0;
+ sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+ if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
+ _debug("run timer");
+ call->resend_timer.expires = sp->resend_at;
+ add_timer(&call->resend_timer);
+ }
+
+ /* attempt to cancel the rx-ACK timer, deferring reply transmission if
+ * we're ACK'ing the request phase of an incoming call */
+ ret = -EAGAIN;
+ if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
+ /* the packet may be freed by rxrpc_process_call() before this
+ * returns */
+ ret = rxrpc_send_packet(call->conn->trans, skb);
+ _net("sent skb %p", skb);
+ } else {
+ _debug("failed to delete ACK timer");
+ }
+
+ if (ret < 0) {
+ _debug("need instant resend %d", ret);
+ sp->need_resend = 1;
+ rxrpc_instant_resend(call);
+ }
+
+ _leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct kiocb *iocb,
+ struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len)
+{
+ struct rxrpc_skb_priv *sp;
+ unsigned char __user *from;
+ struct sk_buff *skb;
+ struct iovec *iov;
+ struct sock *sk = &rx->sk;
+ long timeo;
+ bool more;
+ int ret, ioc, segment, copied;
+
+ _enter(",,,{%zu},%zu", msg->msg_iovlen, len);
+
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ /* this should be in poll */
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ return -EPIPE;
+
+ iov = msg->msg_iov;
+ ioc = msg->msg_iovlen - 1;
+ from = iov->iov_base;
+ segment = iov->iov_len;
+ iov++;
+ more = msg->msg_flags & MSG_MORE;
+
+ skb = call->tx_pending;
+ call->tx_pending = NULL;
+
+ copied = 0;
+ do {
+ int copy;
+
+ if (segment > len)
+ segment = len;
+
+ _debug("SEGMENT %d @%p", segment, from);
+
+ if (!skb) {
+ size_t size, chunk, max, space;
+
+ _debug("alloc");
+
+ if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) <= 0) {
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ ret = rxrpc_wait_for_tx_window(rx, call,
+ &timeo);
+ if (ret < 0)
+ goto maybe_error;
+ }
+
+ max = call->conn->trans->peer->maxdata;
+ max -= call->conn->security_size;
+ max &= ~(call->conn->size_align - 1UL);
+
+ chunk = max;
+ if (chunk > len && !more)
+ chunk = len;
+
+ space = chunk + call->conn->size_align;
+ space &= ~(call->conn->size_align - 1UL);
+
+ size = space + call->conn->header_size;
+
+ _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+ /* create a buffer that we can retain until it's ACK'd */
+ skb = sock_alloc_send_skb(
+ sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+ if (!skb)
+ goto maybe_error;
+
+ rxrpc_new_skb(skb);
+
+ _debug("ALLOC SEND %p", skb);
+
+ ASSERTCMP(skb->mark, ==, 0);
+
+ _debug("HS: %u", call->conn->header_size);
+ skb_reserve(skb, call->conn->header_size);
+ skb->len += call->conn->header_size;
+
+ sp = rxrpc_skb(skb);
+ sp->remain = chunk;
+ if (sp->remain > skb_tailroom(skb))
+ sp->remain = skb_tailroom(skb);
+
+ _net("skb: hr %d, tr %d, hl %d, rm %d",
+ skb_headroom(skb),
+ skb_tailroom(skb),
+ skb_headlen(skb),
+ sp->remain);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ _debug("append");
+ sp = rxrpc_skb(skb);
+
+ /* append next segment of data to the current buffer */
+ copy = skb_tailroom(skb);
+ ASSERTCMP(copy, >, 0);
+ if (copy > segment)
+ copy = segment;
+ if (copy > sp->remain)
+ copy = sp->remain;
+
+ _debug("add");
+ ret = skb_add_data(skb, from, copy);
+ _debug("added");
+ if (ret < 0)
+ goto efault;
+ sp->remain -= copy;
+ skb->mark += copy;
+
+ len -= copy;
+ segment -= copy;
+ from += copy;
+ while (segment == 0 && ioc > 0) {
+ from = iov->iov_base;
+ segment = iov->iov_len;
+ iov++;
+ ioc--;
+ }
+ if (len == 0) {
+ segment = 0;
+ ioc = 0;
+ }
+
+ /* check for the far side aborting the call or a network error
+ * occurring */
+ if (call->state > RXRPC_CALL_COMPLETE)
+ goto call_aborted;
+
+ /* add the packet to the send queue if it's now full */
+ if (sp->remain <= 0 || (segment == 0 && !more)) {
+ struct rxrpc_connection *conn = call->conn;
+ size_t pad;
+
+ /* pad out if we're using security */
+ if (conn->security) {
+ pad = conn->security_size + skb->mark;
+ pad = conn->size_align - pad;
+ pad &= conn->size_align - 1;
+ _debug("pad %zu", pad);
+ if (pad)
+ memset(skb_put(skb, pad), 0, pad);
+ }
+
+ sp->hdr.epoch = conn->epoch;
+ sp->hdr.cid = call->cid;
+ sp->hdr.callNumber = call->call_id;
+ sp->hdr.seq =
+ htonl(atomic_inc_return(&call->sequence));
+ sp->hdr.serial =
+ htonl(atomic_inc_return(&conn->serial));
+ sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
+ sp->hdr.userStatus = 0;
+ sp->hdr.securityIndex = conn->security_ix;
+ sp->hdr._rsvd = 0;
+ sp->hdr.serviceId = conn->service_id;
+
+ sp->hdr.flags = conn->out_clientflag;
+ if (len == 0 && !more)
+ sp->hdr.flags |= RXRPC_LAST_PACKET;
+ else if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 1)
+ sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+ ret = rxrpc_secure_packet(
+ call, skb, skb->mark,
+ skb->head + sizeof(struct rxrpc_header));
+ if (ret < 0)
+ goto out;
+
+ memcpy(skb->head, &sp->hdr,
+ sizeof(struct rxrpc_header));
+ rxrpc_queue_packet(call, skb, segment == 0 && !more);
+ skb = NULL;
+ }
+
+ } while (segment > 0);
+
+out:
+ call->tx_pending = skb;
+ _leave(" = %d", ret);
+ return ret;
+
+call_aborted:
+ rxrpc_free_skb(skb);
+ if (call->state == RXRPC_CALL_NETWORK_ERROR)
+ ret = call->conn->trans->peer->net_error;
+ else
+ ret = -ECONNABORTED;
+ _leave(" = %d", ret);
+ return ret;
+
+maybe_error:
+ if (copied)
+ ret = copied;
+ goto out;
+
+efault:
+ ret = -EFAULT;
+ goto out;
+}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
new file mode 100644
index 000000000000..ce08b78647ce
--- /dev/null
+++ b/net/rxrpc/ar-peer.c
@@ -0,0 +1,316 @@
+/* RxRPC remote transport endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_peers);
+static DEFINE_RWLOCK(rxrpc_peer_lock);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
+
+static void rxrpc_destroy_peer(struct work_struct *work);
+
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+{
+ struct rtable *rt;
+ struct flowi fl;
+ int ret;
+
+ peer->if_mtu = 1500;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ fl.oif = 0;
+ fl.proto = IPPROTO_UDP,
+ fl.nl_u.ip4_u.saddr = 0;
+ fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
+ fl.nl_u.ip4_u.tos = 0;
+ /* assume AFS.CM talking to AFS.FS */
+ fl.uli_u.ports.sport = htons(7001);
+ fl.uli_u.ports.dport = htons(7000);
+ break;
+ default:
+ BUG();
+ }
+
+ ret = ip_route_output_key(&rt, &fl);
+ if (ret < 0) {
+ kleave(" [route err %d]", ret);
+ return;
+ }
+
+ peer->if_mtu = dst_mtu(&rt->u.dst);
+ dst_release(&rt->u.dst);
+
+ kleave(" [if_mtu %u]", peer->if_mtu);
+}
+
+/*
+ * allocate a new peer
+ */
+static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+ if (peer) {
+ INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
+ INIT_LIST_HEAD(&peer->link);
+ INIT_LIST_HEAD(&peer->error_targets);
+ spin_lock_init(&peer->lock);
+ atomic_set(&peer->usage, 1);
+ peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&peer->srx, srx, sizeof(*srx));
+
+ rxrpc_assess_MTU_size(peer);
+ peer->mtu = peer->if_mtu;
+
+ if (srx->transport.family == AF_INET) {
+ peer->hdrsize = sizeof(struct iphdr);
+ switch (srx->transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ BUG();
+ }
+
+ peer->hdrsize += sizeof(struct rxrpc_header);
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_peer *peer, *candidate;
+ const char *new = "old";
+ int usage;
+
+ _enter("{%d,%d,%u.%u.%u.%u+%hu}",
+ srx->transport_type,
+ srx->transport_len,
+ NIPQUAD(srx->transport.sin.sin_addr),
+ ntohs(srx->transport.sin.sin_port));
+
+ /* search the peer list first */
+ read_lock_bh(&rxrpc_peer_lock);
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ _debug("check PEER %d { u=%d t=%d l=%d }",
+ peer->debug_id,
+ atomic_read(&peer->usage),
+ peer->srx.transport_type,
+ peer->srx.transport_len);
+
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == srx->transport_type &&
+ peer->srx.transport_len == srx->transport_len &&
+ memcmp(&peer->srx.transport,
+ &srx->transport,
+ srx->transport_len) == 0)
+ goto found_extant_peer;
+ }
+ read_unlock_bh(&rxrpc_peer_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_peer(srx, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ write_lock_bh(&rxrpc_peer_lock);
+
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == srx->transport_type &&
+ peer->srx.transport_len == srx->transport_len &&
+ memcmp(&peer->srx.transport,
+ &srx->transport,
+ srx->transport_len) == 0)
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ peer = candidate;
+ candidate = NULL;
+
+ list_add_tail(&peer->link, &rxrpc_peers);
+ write_unlock_bh(&rxrpc_peer_lock);
+ new = "new";
+
+success:
+ _net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}",
+ new,
+ peer->debug_id,
+ peer->srx.transport_type,
+ peer->srx.transport.family,
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ return peer;
+
+ /* we found the peer in the list immediately */
+found_extant_peer:
+ usage = atomic_inc_return(&peer->usage);
+ read_unlock_bh(&rxrpc_peer_lock);
+ goto success;
+
+ /* we found the peer on the second time through the list */
+found_extant_second:
+ usage = atomic_inc_return(&peer->usage);
+ write_unlock_bh(&rxrpc_peer_lock);
+ kfree(candidate);
+ goto success;
+}
+
+/*
+ * find the peer associated with a packet
+ */
+struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
+ __be32 addr, __be16 port)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ /* search the peer list */
+ read_lock_bh(&rxrpc_peer_lock);
+
+ if (local->srx.transport.family == AF_INET &&
+ local->srx.transport_type == SOCK_DGRAM
+ ) {
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == SOCK_DGRAM &&
+ peer->srx.transport.family == AF_INET &&
+ peer->srx.transport.sin.sin_port == port &&
+ peer->srx.transport.sin.sin_addr.s_addr == addr)
+ goto found_UDP_peer;
+ }
+
+ goto new_UDP_peer;
+ }
+
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = -EAFNOSUPPORT");
+ return ERR_PTR(-EAFNOSUPPORT);
+
+found_UDP_peer:
+ _net("Rx UDP DGRAM from peer %d", peer->debug_id);
+ atomic_inc(&peer->usage);
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = %p", peer);
+ return peer;
+
+new_UDP_peer:
+ _net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = -EBUSY [new]");
+ return ERR_PTR(-EBUSY);
+}
+
+/*
+ * release a remote transport endpoint
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ _enter("%p{u=%d}", peer, atomic_read(&peer->usage));
+
+ ASSERTCMP(atomic_read(&peer->usage), >, 0);
+
+ if (likely(!atomic_dec_and_test(&peer->usage))) {
+ _leave(" [in use]");
+ return;
+ }
+
+ rxrpc_queue_work(&peer->destroyer);
+ _leave("");
+}
+
+/*
+ * destroy a remote transport endpoint
+ */
+static void rxrpc_destroy_peer(struct work_struct *work)
+{
+ struct rxrpc_peer *peer =
+ container_of(work, struct rxrpc_peer, destroyer);
+
+ _enter("%p{%d}", peer, atomic_read(&peer->usage));
+
+ write_lock_bh(&rxrpc_peer_lock);
+ list_del(&peer->link);
+ write_unlock_bh(&rxrpc_peer_lock);
+
+ _net("DESTROY PEER %d", peer->debug_id);
+ kfree(peer);
+
+ if (list_empty(&rxrpc_peers))
+ wake_up_all(&rxrpc_peer_wq);
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the peer records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_peers(void)
+{
+ DECLARE_WAITQUEUE(myself,current);
+
+ _enter("");
+
+ /* we simply have to wait for them to go away */
+ if (!list_empty(&rxrpc_peers)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&rxrpc_peer_wq, &myself);
+
+ while (!list_empty(&rxrpc_peers)) {
+ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&rxrpc_peer_wq, &myself);
+ set_current_state(TASK_RUNNING);
+ }
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
new file mode 100644
index 000000000000..58f4b4e5cece
--- /dev/null
+++ b/net/rxrpc/ar-proc.c
@@ -0,0 +1,247 @@
+/* /proc/net/ support for AF_RXRPC
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static const char *rxrpc_conn_states[] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVER] = "SvSecure",
+ [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_NETWORK_ERROR] = "NetError",
+};
+
+const char *rxrpc_call_states[] = {
+ [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
+ [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
+ [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
+ [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK",
+ [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
+ [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept",
+ [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
+ [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
+ [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
+ [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK",
+ [RXRPC_CALL_COMPLETE] = "Complete",
+ [RXRPC_CALL_SERVER_BUSY] = "SvBusy ",
+ [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CALL_NETWORK_ERROR] = "NetError",
+ [RXRPC_CALL_DEAD] = "Dead ",
+};
+
+/*
+ * generate a list of extant and dead calls in /proc/net/rxrpc_calls
+ */
+static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ read_lock(&rxrpc_call_lock);
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ list_for_each(_p, &rxrpc_calls)
+ if (!pos--)
+ break;
+
+ return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
+
+ return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock(&rxrpc_call_lock);
+}
+
+static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_transport *trans;
+ struct rxrpc_call *call;
+ char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local Remote "
+ " SvID ConnID CallID End Use State Abort "
+ " UserID\n");
+ return 0;
+ }
+
+ call = list_entry(v, struct rxrpc_call, link);
+ trans = call->conn->trans;
+
+ sprintf(lbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+ ntohs(trans->local->srx.transport.sin.sin_port));
+
+ sprintf(rbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+ ntohs(trans->peer->srx.transport.sin.sin_port));
+
+ seq_printf(seq,
+ "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ " %-8.8s %08x %lx\n",
+ lbuff,
+ rbuff,
+ ntohs(call->conn->service_id),
+ ntohl(call->conn->cid),
+ ntohl(call->call_id),
+ call->conn->in_clientflag ? "Svc" : "Clt",
+ atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->abort_code,
+ call->user_call_ID);
+
+ return 0;
+}
+
+static struct seq_operations rxrpc_call_seq_ops = {
+ .start = rxrpc_call_seq_start,
+ .next = rxrpc_call_seq_next,
+ .stop = rxrpc_call_seq_stop,
+ .show = rxrpc_call_seq_show,
+};
+
+static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &rxrpc_call_seq_ops);
+}
+
+struct file_operations rxrpc_call_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rxrpc_call_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * generate a list of extant virtual connections in /proc/net/rxrpc_conns
+ */
+static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ read_lock(&rxrpc_connection_lock);
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ list_for_each(_p, &rxrpc_connections)
+ if (!pos--)
+ break;
+
+ return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
+
+ return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock(&rxrpc_connection_lock);
+}
+
+static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local Remote "
+ " SvID ConnID Calls End Use State Key "
+ " Serial ISerial\n"
+ );
+ return 0;
+ }
+
+ conn = list_entry(v, struct rxrpc_connection, link);
+ trans = conn->trans;
+
+ sprintf(lbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+ ntohs(trans->local->srx.transport.sin.sin_port));
+
+ sprintf(rbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+ ntohs(trans->peer->srx.transport.sin.sin_port));
+
+ seq_printf(seq,
+ "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ " %s %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ ntohs(conn->service_id),
+ ntohl(conn->cid),
+ conn->call_counter,
+ conn->in_clientflag ? "Svc" : "Clt",
+ atomic_read(&conn->usage),
+ rxrpc_conn_states[conn->state],
+ key_serial(conn->key),
+ atomic_read(&conn->serial),
+ atomic_read(&conn->hi_serial));
+
+ return 0;
+}
+
+static struct seq_operations rxrpc_connection_seq_ops = {
+ .start = rxrpc_connection_seq_start,
+ .next = rxrpc_connection_seq_next,
+ .stop = rxrpc_connection_seq_stop,
+ .show = rxrpc_connection_seq_show,
+};
+
+
+static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &rxrpc_connection_seq_ops);
+}
+
+struct file_operations rxrpc_connection_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rxrpc_connection_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
new file mode 100644
index 000000000000..f19121d4795b
--- /dev/null
+++ b/net/rxrpc/ar-recvmsg.c
@@ -0,0 +1,437 @@
+/* RxRPC recvmsg() implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * removal a call's user ID from the socket tree to make the user ID available
+ * again and so that it won't be seen again in association with that call
+ */
+void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+ _debug("RELEASE CALL %d", call->debug_id);
+
+ if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ write_lock_bh(&rx->call_lock);
+ rb_erase(&call->sock_node, &call->socket->calls);
+ clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ write_unlock_bh(&rx->call_lock);
+ }
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ * simultaneously
+ */
+int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_call *call = NULL, *continue_call = NULL;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct sk_buff *skb;
+ long timeo;
+ int copy, ret, ullen, offset, copied = 0;
+ u32 abort_code;
+
+ DEFINE_WAIT(wait);
+
+ _enter(",,,%zu,%d", len, flags);
+
+ if (flags & (MSG_OOB | MSG_TRUNC))
+ return -EOPNOTSUPP;
+
+ ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+
+ timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+ msg->msg_flags |= MSG_MORE;
+
+ lock_sock(&rx->sk);
+
+ for (;;) {
+ /* return immediately if a client socket has no outstanding
+ * calls */
+ if (RB_EMPTY_ROOT(&rx->calls)) {
+ if (copied)
+ goto out;
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+ release_sock(&rx->sk);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ return -ENODATA;
+ }
+ }
+
+ /* get the next message on the Rx queue */
+ skb = skb_peek(&rx->sk.sk_receive_queue);
+ if (!skb) {
+ /* nothing remains on the queue */
+ if (copied &&
+ (msg->msg_flags & MSG_PEEK || timeo == 0))
+ goto out;
+
+ /* wait for a message to turn up */
+ release_sock(&rx->sk);
+ prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
+ ret = sock_error(&rx->sk);
+ if (ret)
+ goto wait_error;
+
+ if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
+ if (signal_pending(current))
+ goto wait_interrupted;
+ timeo = schedule_timeout(timeo);
+ }
+ finish_wait(rx->sk.sk_sleep, &wait);
+ lock_sock(&rx->sk);
+ continue;
+ }
+
+ peek_next_packet:
+ sp = rxrpc_skb(skb);
+ call = sp->call;
+ ASSERT(call != NULL);
+
+ _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+
+ /* make sure we wait for the state to be updated in this call */
+ spin_lock_bh(&call->lock);
+ spin_unlock_bh(&call->lock);
+
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ _debug("packet from released call");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ continue;
+ }
+
+ /* determine whether to continue last data receive */
+ if (continue_call) {
+ _debug("maybe cont");
+ if (call != continue_call ||
+ skb->mark != RXRPC_SKB_MARK_DATA) {
+ release_sock(&rx->sk);
+ rxrpc_put_call(continue_call);
+ _leave(" = %d [noncont]", copied);
+ return copied;
+ }
+ }
+
+ rxrpc_get_call(call);
+
+ /* copy the peer address and timestamp */
+ if (!continue_call) {
+ if (msg->msg_name && msg->msg_namelen > 0)
+ memcpy(&msg->msg_name, &call->conn->trans->peer->srx,
+ sizeof(call->conn->trans->peer->srx));
+ sock_recv_timestamp(msg, &rx->sk, skb);
+ }
+
+ /* receive the message */
+ if (skb->mark != RXRPC_SKB_MARK_DATA)
+ goto receive_non_data_message;
+
+ _debug("recvmsg DATA #%u { %d, %d }",
+ ntohl(sp->hdr.seq), skb->len, sp->offset);
+
+ if (!continue_call) {
+ /* only set the control data once per recvmsg() */
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ ullen, &call->user_call_ID);
+ if (ret < 0)
+ goto copy_error;
+ ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+ }
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+ ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+ call->rx_data_recv = ntohl(sp->hdr.seq);
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+
+ offset = sp->offset;
+ copy = skb->len - offset;
+ if (copy > len - copied)
+ copy = len - copied;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ ret = skb_copy_datagram_iovec(skb, offset,
+ msg->msg_iov, copy);
+ } else {
+ ret = skb_copy_and_csum_datagram_iovec(skb, offset,
+ msg->msg_iov);
+ if (ret == -EINVAL)
+ goto csum_copy_error;
+ }
+
+ if (ret < 0)
+ goto copy_error;
+
+ /* handle piecemeal consumption of data packets */
+ _debug("copied %d+%d", copy, copied);
+
+ offset += copy;
+ copied += copy;
+
+ if (!(flags & MSG_PEEK))
+ sp->offset = offset;
+
+ if (sp->offset < skb->len) {
+ _debug("buffer full");
+ ASSERTCMP(copied, ==, len);
+ break;
+ }
+
+ /* we transferred the whole data packet */
+ if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+ _debug("last");
+ if (call->conn->out_clientflag) {
+ /* last byte of reply received */
+ ret = copied;
+ goto terminal_message;
+ }
+
+ /* last bit of request received */
+ if (!(flags & MSG_PEEK)) {
+ _debug("eat packet");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) !=
+ skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+ msg->msg_flags &= ~MSG_MORE;
+ break;
+ }
+
+ /* move on to the next data message */
+ _debug("next");
+ if (!continue_call)
+ continue_call = sp->call;
+ else
+ rxrpc_put_call(call);
+ call = NULL;
+
+ if (flags & MSG_PEEK) {
+ _debug("peek next");
+ skb = skb->next;
+ if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
+ break;
+ goto peek_next_packet;
+ }
+
+ _debug("eat packet");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+
+ /* end of non-terminal data packet reception for the moment */
+ _debug("end rcv data");
+out:
+ release_sock(&rx->sk);
+ if (call)
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d [data]", copied);
+ return copied;
+
+ /* handle non-DATA messages such as aborts, incoming connections and
+ * final ACKs */
+receive_non_data_message:
+ _debug("non-data");
+
+ if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
+ _debug("RECV NEW CALL");
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
+ if (ret < 0)
+ goto copy_error;
+ if (!(flags & MSG_PEEK)) {
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+ goto out;
+ }
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ ullen, &call->user_call_ID);
+ if (ret < 0)
+ goto copy_error;
+ ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_DATA:
+ BUG();
+ case RXRPC_SKB_MARK_FINAL_ACK:
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_BUSY:
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_REMOTE_ABORT:
+ abort_code = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_NET_ERROR:
+ _debug("RECV NET ERROR %d", sp->error);
+ abort_code = sp->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_LOCAL_ERROR:
+ _debug("RECV LOCAL ERROR %d", sp->error);
+ abort_code = sp->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
+ &abort_code);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (ret < 0)
+ goto copy_error;
+
+terminal_message:
+ _debug("terminal");
+ msg->msg_flags &= ~MSG_MORE;
+ msg->msg_flags |= MSG_EOR;
+
+ if (!(flags & MSG_PEEK)) {
+ _net("free terminal skb %p", skb);
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ rxrpc_remove_user_ID(rx, call);
+ }
+
+ release_sock(&rx->sk);
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d", ret);
+ return ret;
+
+copy_error:
+ _debug("copy error");
+ release_sock(&rx->sk);
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d", ret);
+ return ret;
+
+csum_copy_error:
+ _debug("csum error");
+ release_sock(&rx->sk);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ rxrpc_kill_skb(skb);
+ skb_kill_datagram(&rx->sk, skb, flags);
+ rxrpc_put_call(call);
+ return -EAGAIN;
+
+wait_interrupted:
+ ret = sock_intr_errno(timeo);
+wait_error:
+ finish_wait(rx->sk.sk_sleep, &wait);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ if (copied)
+ copied = ret;
+ _leave(" = %d [waitfail %d]", copied, ret);
+ return copied;
+
+}
+
+/**
+ * rxrpc_kernel_data_delivered - Record delivery of data message
+ * @skb: Message holding data
+ *
+ * Record the delivery of a data message. This permits RxRPC to keep its
+ * tracking correct. The socket buffer will be deleted.
+ */
+void rxrpc_kernel_data_delivered(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = sp->call;
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+ ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+ call->rx_data_recv = ntohl(sp->hdr.seq);
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+ rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
+
+/**
+ * rxrpc_kernel_is_data_last - Determine if data message is last one
+ * @skb: Message holding data
+ *
+ * Determine if data message is last one for the parent call.
+ */
+bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+
+ return sp->hdr.flags & RXRPC_LAST_PACKET;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+
+/**
+ * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
+ * @skb: Message indicating an abort
+ *
+ * Get the abort code from an RxRPC abort message.
+ */
+u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT);
+
+ return sp->call->abort_code;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
+
+/**
+ * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
+ * @skb: Message indicating an error
+ *
+ * Get the error number from an RxRPC error message.
+ */
+int rxrpc_kernel_get_error_number(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ return sp->error;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
new file mode 100644
index 000000000000..60d1d364430a
--- /dev/null
+++ b/net/rxrpc/ar-security.c
@@ -0,0 +1,258 @@
+/* RxRPC security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_security_methods);
+static DECLARE_RWSEM(rxrpc_security_sem);
+
+/*
+ * get an RxRPC security module
+ */
+static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec)
+{
+ return try_module_get(sec->owner) ? sec : NULL;
+}
+
+/*
+ * release an RxRPC security module
+ */
+static void rxrpc_security_put(struct rxrpc_security *sec)
+{
+ module_put(sec->owner);
+}
+
+/*
+ * look up an rxrpc security module
+ */
+struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
+{
+ struct rxrpc_security *sec = NULL;
+
+ _enter("");
+
+ down_read(&rxrpc_security_sem);
+
+ list_for_each_entry(sec, &rxrpc_security_methods, link) {
+ if (sec->security_index == security_index) {
+ if (unlikely(!rxrpc_security_get(sec)))
+ break;
+ goto out;
+ }
+ }
+
+ sec = NULL;
+out:
+ up_read(&rxrpc_security_sem);
+ _leave(" = %p [%s]", sec, sec ? sec->name : "");
+ return sec;
+}
+
+/**
+ * rxrpc_register_security - register an RxRPC security handler
+ * @sec: security module
+ *
+ * register an RxRPC security handler for use by RxRPC
+ */
+int rxrpc_register_security(struct rxrpc_security *sec)
+{
+ struct rxrpc_security *psec;
+ int ret;
+
+ _enter("");
+ down_write(&rxrpc_security_sem);
+
+ ret = -EEXIST;
+ list_for_each_entry(psec, &rxrpc_security_methods, link) {
+ if (psec->security_index == sec->security_index)
+ goto out;
+ }
+
+ list_add(&sec->link, &rxrpc_security_methods);
+
+ printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n",
+ sec->security_index, sec->name);
+ ret = 0;
+
+out:
+ up_write(&rxrpc_security_sem);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_register_security);
+
+/**
+ * rxrpc_unregister_security - unregister an RxRPC security handler
+ * @sec: security module
+ *
+ * unregister an RxRPC security handler
+ */
+void rxrpc_unregister_security(struct rxrpc_security *sec)
+{
+
+ _enter("");
+ down_write(&rxrpc_security_sem);
+ list_del_init(&sec->link);
+ up_write(&rxrpc_security_sem);
+
+ printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n",
+ sec->security_index, sec->name);
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
+
+/*
+ * initialise the security on a client connection
+ */
+int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_security *sec;
+ struct key *key = conn->key;
+ int ret;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(key));
+
+ if (!key)
+ return 0;
+
+ ret = key_validate(key);
+ if (ret < 0)
+ return ret;
+
+ sec = rxrpc_security_lookup(key->type_data.x[0]);
+ if (!sec)
+ return -EKEYREJECTED;
+ conn->security = sec;
+
+ ret = conn->security->init_connection_security(conn);
+ if (ret < 0) {
+ rxrpc_security_put(conn->security);
+ conn->security = NULL;
+ return ret;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * initialise the security on a server connection
+ */
+int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_security *sec;
+ struct rxrpc_local *local = conn->trans->local;
+ struct rxrpc_sock *rx;
+ struct key *key;
+ key_ref_t kref;
+ char kdesc[5+1+3+1];
+
+ _enter("");
+
+ sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix);
+
+ sec = rxrpc_security_lookup(conn->security_ix);
+ if (!sec) {
+ _leave(" = -ENOKEY [lookup]");
+ return -ENOKEY;
+ }
+
+ /* find the service */
+ read_lock_bh(&local->services_lock);
+ list_for_each_entry(rx, &local->services, listen_link) {
+ if (rx->service_id == conn->service_id)
+ goto found_service;
+ }
+
+ /* the service appears to have died */
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = -ENOENT");
+ return -ENOENT;
+
+found_service:
+ if (!rx->securities) {
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = -ENOKEY");
+ return -ENOKEY;
+ }
+
+ /* look through the service's keyring */
+ kref = keyring_search(make_key_ref(rx->securities, 1UL),
+ &key_type_rxrpc_s, kdesc);
+ if (IS_ERR(kref)) {
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = %ld [search]", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ key = key_ref_to_ptr(kref);
+ read_unlock_bh(&local->services_lock);
+
+ conn->server_key = key;
+ conn->security = sec;
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_secure_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ size_t data_size,
+ void *sechdr)
+{
+ if (call->conn->security)
+ return call->conn->security->secure_packet(
+ call, skb, data_size, sechdr);
+ return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ if (call->conn->security)
+ return call->conn->security->verify_packet(
+ call, skb, _abort_code);
+ return 0;
+}
+
+/*
+ * clear connection security
+ */
+void rxrpc_clear_conn_security(struct rxrpc_connection *conn)
+{
+ _enter("{%d}", conn->debug_id);
+
+ if (conn->security) {
+ conn->security->clear(conn);
+ rxrpc_security_put(conn->security);
+ conn->security = NULL;
+ }
+
+ key_put(conn->key);
+ key_put(conn->server_key);
+}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
new file mode 100644
index 000000000000..de755e04d29c
--- /dev/null
+++ b/net/rxrpc/ar-skbuff.c
@@ -0,0 +1,132 @@
+/* ar-skbuff.c: socket buffer destruction handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * set up for the ACK at the end of the receive phase when we discard the final
+ * receive phase data packet
+ * - called with softirqs disabled
+ */
+static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+{
+ /* the call may be aborted before we have a chance to ACK it */
+ write_lock(&call->state_lock);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
+ _debug("request final ACK");
+
+ /* get an extra ref on the call for the final-ACK generator to
+ * release */
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ if (try_to_del_timer_sync(&call->ack_timer) >= 0)
+ rxrpc_queue_call(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+ default:
+ break;
+ }
+
+ write_unlock(&call->state_lock);
+}
+
+/*
+ * drop the bottom ACK off of the call ACK window and advance the window
+ */
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
+ struct rxrpc_skb_priv *sp)
+{
+ int loop;
+ u32 seq;
+
+ spin_lock_bh(&call->lock);
+
+ _debug("hard ACK #%u", ntohl(sp->hdr.seq));
+
+ for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+ call->ackr_window[loop] >>= 1;
+ call->ackr_window[loop] |=
+ call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+ }
+
+ seq = ntohl(sp->hdr.seq);
+ ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
+ call->rx_data_eaten = seq;
+
+ if (call->ackr_win_top < UINT_MAX)
+ call->ackr_win_top++;
+
+ ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+ call->rx_data_post, >=, call->rx_data_recv);
+ ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+ call->rx_data_recv, >=, call->rx_data_eaten);
+
+ if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+ rxrpc_request_final_ACK(call);
+ } else if (atomic_dec_and_test(&call->ackr_not_idle) &&
+ test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+ _debug("send Rx idle ACK");
+ __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
+ true);
+ }
+
+ spin_unlock_bh(&call->lock);
+}
+
+/*
+ * destroy a packet that has an RxRPC control buffer
+ * - advance the hard-ACK state of the parent call (done here in case something
+ * in the kernel bypasses recvmsg() and steals the packet directly off of the
+ * socket receive queue)
+ */
+void rxrpc_packet_destructor(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = sp->call;
+
+ _enter("%p{%p}", skb, call);
+
+ if (call) {
+ /* send the final ACK on a client call */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+ rxrpc_hard_ACK_data(call, sp);
+ rxrpc_put_call(call);
+ sp->call = NULL;
+ }
+
+ if (skb->sk)
+ sock_rfree(skb);
+ _leave("");
+}
+
+/**
+ * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
+ * @skb: The socket buffer to be freed
+ *
+ * Let RxRPC free its own socket buffer, permitting it to maintain debug
+ * accounting.
+ */
+void rxrpc_kernel_free_skb(struct sk_buff *skb)
+{
+ rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
new file mode 100644
index 000000000000..d43d78f19302
--- /dev/null
+++ b/net/rxrpc/ar-transport.c
@@ -0,0 +1,276 @@
+/* RxRPC point-to-point transport session management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_transport_reaper(struct work_struct *work);
+
+static LIST_HEAD(rxrpc_transports);
+static DEFINE_RWLOCK(rxrpc_transport_lock);
+static unsigned long rxrpc_transport_timeout = 3600 * 24;
+static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
+
+/*
+ * allocate a new transport session manager
+ */
+static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ gfp_t gfp)
+{
+ struct rxrpc_transport *trans;
+
+ _enter("");
+
+ trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
+ if (trans) {
+ trans->local = local;
+ trans->peer = peer;
+ INIT_LIST_HEAD(&trans->link);
+ trans->bundles = RB_ROOT;
+ trans->client_conns = RB_ROOT;
+ trans->server_conns = RB_ROOT;
+ skb_queue_head_init(&trans->error_queue);
+ spin_lock_init(&trans->client_lock);
+ rwlock_init(&trans->conn_lock);
+ atomic_set(&trans->usage, 1);
+ trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+ if (peer->srx.transport.family == AF_INET) {
+ switch (peer->srx.transport_type) {
+ case SOCK_DGRAM:
+ INIT_WORK(&trans->error_handler,
+ rxrpc_UDP_error_handler);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ BUG();
+ }
+ }
+
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/*
+ * obtain a transport session for the nominated endpoints
+ */
+struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ gfp_t gfp)
+{
+ struct rxrpc_transport *trans, *candidate;
+ const char *new = "old";
+ int usage;
+
+ _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port),
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ /* search the transport list first */
+ read_lock_bh(&rxrpc_transport_lock);
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_transport;
+ }
+ read_unlock_bh(&rxrpc_transport_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_transport(local, peer, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ write_lock_bh(&rxrpc_transport_lock);
+
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ trans = candidate;
+ candidate = NULL;
+
+ rxrpc_get_local(trans->local);
+ atomic_inc(&trans->peer->usage);
+ list_add_tail(&trans->link, &rxrpc_transports);
+ write_unlock_bh(&rxrpc_transport_lock);
+ new = "new";
+
+success:
+ _net("TRANSPORT %s %d local %d -> peer %d",
+ new,
+ trans->debug_id,
+ trans->local->debug_id,
+ trans->peer->debug_id);
+
+ _leave(" = %p {u=%d}", trans, atomic_read(&trans->usage));
+ return trans;
+
+ /* we found the transport in the list immediately */
+found_extant_transport:
+ usage = atomic_inc_return(&trans->usage);
+ read_unlock_bh(&rxrpc_transport_lock);
+ goto success;
+
+ /* we found the transport on the second time through the list */
+found_extant_second:
+ usage = atomic_inc_return(&trans->usage);
+ write_unlock_bh(&rxrpc_transport_lock);
+ kfree(candidate);
+ goto success;
+}
+
+/*
+ * find the transport connecting two endpoints
+ */
+struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer)
+{
+ struct rxrpc_transport *trans;
+
+ _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port),
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ /* search the transport list */
+ read_lock_bh(&rxrpc_transport_lock);
+
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_transport;
+ }
+
+ read_unlock_bh(&rxrpc_transport_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found_extant_transport:
+ atomic_inc(&trans->usage);
+ read_unlock_bh(&rxrpc_transport_lock);
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/*
+ * release a transport session
+ */
+void rxrpc_put_transport(struct rxrpc_transport *trans)
+{
+ _enter("%p{u=%d}", trans, atomic_read(&trans->usage));
+
+ ASSERTCMP(atomic_read(&trans->usage), >, 0);
+
+ trans->put_time = xtime.tv_sec;
+ if (unlikely(atomic_dec_and_test(&trans->usage)))
+ _debug("zombie");
+ /* let the reaper determine the timeout to avoid a race with
+ * overextending the timeout if the reaper is running at the
+ * same time */
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+ _leave("");
+}
+
+/*
+ * clean up a transport session
+ */
+static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
+{
+ _net("DESTROY TRANS %d", trans->debug_id);
+
+ rxrpc_purge_queue(&trans->error_queue);
+
+ rxrpc_put_local(trans->local);
+ rxrpc_put_peer(trans->peer);
+ kfree(trans);
+}
+
+/*
+ * reap dead transports that have passed their expiry date
+ */
+static void rxrpc_transport_reaper(struct work_struct *work)
+{
+ struct rxrpc_transport *trans, *_p;
+ unsigned long now, earliest, reap_time;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = xtime.tv_sec;
+ earliest = ULONG_MAX;
+
+ /* extract all the transports that have been dead too long */
+ write_lock_bh(&rxrpc_transport_lock);
+ list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
+ _debug("reap TRANS %d { u=%d t=%ld }",
+ trans->debug_id, atomic_read(&trans->usage),
+ (long) now - (long) trans->put_time);
+
+ if (likely(atomic_read(&trans->usage) > 0))
+ continue;
+
+ reap_time = trans->put_time + rxrpc_transport_timeout;
+ if (reap_time <= now)
+ list_move_tail(&trans->link, &graveyard);
+ else if (reap_time < earliest)
+ earliest = reap_time;
+ }
+ write_unlock_bh(&rxrpc_transport_lock);
+
+ if (earliest != ULONG_MAX) {
+ _debug("reschedule reaper %ld", (long) earliest - now);
+ ASSERTCMP(earliest, >, now);
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap,
+ (earliest - now) * HZ);
+ }
+
+ /* then destroy all those pulled out */
+ while (!list_empty(&graveyard)) {
+ trans = list_entry(graveyard.next, struct rxrpc_transport,
+ link);
+ list_del_init(&trans->link);
+
+ ASSERTCMP(atomic_read(&trans->usage), ==, 0);
+ rxrpc_cleanup_transport(trans);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the transport session records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_transports(void)
+{
+ _enter("");
+
+ rxrpc_transport_timeout = 0;
+ cancel_delayed_work(&rxrpc_transport_reap);
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+
+ _leave("");
+}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
deleted file mode 100644
index d07122b57e0d..000000000000
--- a/net/rxrpc/call.c
+++ /dev/null
@@ -1,2277 +0,0 @@
-/* call.c: Rx call routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_call_count);
-__RXACCT_DECL(atomic_t rxrpc_message_count);
-
-LIST_HEAD(rxrpc_calls);
-DECLARE_RWSEM(rxrpc_calls_sem);
-
-unsigned rxrpc_call_rcv_timeout = HZ/3;
-static unsigned rxrpc_call_acks_timeout = HZ/3;
-static unsigned rxrpc_call_dfr_ack_timeout = HZ/20;
-static unsigned short rxrpc_call_max_resend = HZ/10;
-
-const char *rxrpc_call_states[] = {
- "COMPLETE",
- "ERROR",
- "SRVR_RCV_OPID",
- "SRVR_RCV_ARGS",
- "SRVR_GOT_ARGS",
- "SRVR_SND_REPLY",
- "SRVR_RCV_FINAL_ACK",
- "CLNT_SND_ARGS",
- "CLNT_RCV_REPLY",
- "CLNT_GOT_REPLY"
-};
-
-const char *rxrpc_call_error_states[] = {
- "NO_ERROR",
- "LOCAL_ABORT",
- "PEER_ABORT",
- "LOCAL_ERROR",
- "REMOTE_ERROR"
-};
-
-const char *rxrpc_pkts[] = {
- "?00",
- "data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
- "?09", "?10", "?11", "?12", "?13", "?14", "?15"
-};
-
-static const char *rxrpc_acks[] = {
- "---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
- "-?-"
-};
-
-static const char _acktype[] = "NA-";
-
-static void rxrpc_call_receive_packet(struct rxrpc_call *call);
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg);
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg);
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
- rxrpc_seq_t higest);
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
-static int __rxrpc_call_read_data(struct rxrpc_call *call);
-
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
- struct rxrpc_message *msg,
- rxrpc_seq_t seq,
- size_t count);
-
-static int rxrpc_call_flush(struct rxrpc_call *call);
-
-#define _state(call) \
- _debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
-
-static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
-{
- wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_error_func(struct rxrpc_call *call)
-{
- wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
-{
- switch (call->app_err_state) {
- case RXRPC_ESTATE_LOCAL_ABORT:
- call->app_abort_code = -call->app_errno;
- case RXRPC_ESTATE_PEER_ABORT:
- call->app_errno = -ECONNABORTED;
- default:
- break;
- }
-}
-
-static void __rxrpc_call_acks_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_ACKS_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_rcv_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_RCV_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_ackr_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_ACKR_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-/*****************************************************************************/
-/*
- * calculate a timeout based on an RTT value
- */
-static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
- unsigned long val)
-{
- unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
-
- expiry += 10;
- if (expiry < HZ / 25)
- expiry = HZ / 25;
- if (expiry > HZ)
- expiry = HZ;
-
- _leave(" = %lu jiffies", expiry);
- return jiffies + expiry;
-} /* end __rxrpc_rtt_based_timeout() */
-
-/*****************************************************************************/
-/*
- * create a new call record
- */
-static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
- struct rxrpc_call **_call)
-{
- struct rxrpc_call *call;
-
- _enter("%p", conn);
-
- /* allocate and initialise a call record */
- call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
- if (!call) {
- _leave(" ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&call->usage, 1);
-
- init_waitqueue_head(&call->waitq);
- spin_lock_init(&call->lock);
- INIT_LIST_HEAD(&call->link);
- INIT_LIST_HEAD(&call->acks_pendq);
- INIT_LIST_HEAD(&call->rcv_receiveq);
- INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
- INIT_LIST_HEAD(&call->app_readyq);
- INIT_LIST_HEAD(&call->app_unreadyq);
- INIT_LIST_HEAD(&call->app_link);
- INIT_LIST_HEAD(&call->app_attn_link);
-
- init_timer(&call->acks_timeout);
- call->acks_timeout.data = (unsigned long) call;
- call->acks_timeout.function = __rxrpc_call_acks_timeout;
-
- init_timer(&call->rcv_timeout);
- call->rcv_timeout.data = (unsigned long) call;
- call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
-
- init_timer(&call->ackr_dfr_timo);
- call->ackr_dfr_timo.data = (unsigned long) call;
- call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
-
- call->conn = conn;
- call->ackr_win_bot = 1;
- call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
- call->ackr_prev_seq = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_attn_func = rxrpc_call_default_attn_func;
- call->app_error_func = rxrpc_call_default_error_func;
- call->app_aemap_func = rxrpc_call_default_aemap_func;
- call->app_scr_alloc = call->app_scratch;
-
- call->cjif = jiffies;
-
- _leave(" = 0 (%p)", call);
-
- *_call = call;
-
- return 0;
-} /* end __rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for outgoing calls
- */
-int rxrpc_create_call(struct rxrpc_connection *conn,
- rxrpc_call_attn_func_t attn,
- rxrpc_call_error_func_t error,
- rxrpc_call_aemap_func_t aemap,
- struct rxrpc_call **_call)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- struct rxrpc_call *call;
- int ret, cix, loop;
-
- _enter("%p", conn);
-
- /* allocate and initialise a call record */
- ret = __rxrpc_create_call(conn, &call);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
- if (attn)
- call->app_attn_func = attn;
- if (error)
- call->app_error_func = error;
- if (aemap)
- call->app_aemap_func = aemap;
-
- _state(call);
-
- spin_lock(&conn->lock);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&conn->chanwait, &myself);
-
- try_again:
- /* try to find an unused channel */
- for (cix = 0; cix < 4; cix++)
- if (!conn->channels[cix])
- goto obtained_chan;
-
- /* no free channels - wait for one to become available */
- ret = -EINTR;
- if (signal_pending(current))
- goto error_unwait;
-
- spin_unlock(&conn->lock);
-
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
-
- spin_lock(&conn->lock);
- goto try_again;
-
- /* got a channel - now attach to the connection */
- obtained_chan:
- remove_wait_queue(&conn->chanwait, &myself);
- set_current_state(TASK_RUNNING);
-
- /* concoct a unique call number */
- next_callid:
- call->call_id = htonl(++conn->call_counter);
- for (loop = 0; loop < 4; loop++)
- if (conn->channels[loop] &&
- conn->channels[loop]->call_id == call->call_id)
- goto next_callid;
-
- rxrpc_get_connection(conn);
- conn->channels[cix] = call; /* assign _after_ done callid check loop */
- do_gettimeofday(&conn->atime);
- call->chan_ix = htonl(cix);
-
- spin_unlock(&conn->lock);
-
- down_write(&rxrpc_calls_sem);
- list_add_tail(&call->call_link, &rxrpc_calls);
- up_write(&rxrpc_calls_sem);
-
- __RXACCT(atomic_inc(&rxrpc_call_count));
- *_call = call;
-
- _leave(" = 0 (call=%p cix=%u)", call, cix);
- return 0;
-
- error_unwait:
- remove_wait_queue(&conn->chanwait, &myself);
- set_current_state(TASK_RUNNING);
- spin_unlock(&conn->lock);
-
- free_page((unsigned long) call);
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for incoming calls
- */
-int rxrpc_incoming_call(struct rxrpc_connection *conn,
- struct rxrpc_message *msg,
- struct rxrpc_call **_call)
-{
- struct rxrpc_call *call;
- unsigned cix;
- int ret;
-
- cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
- _enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
-
- /* allocate and initialise a call record */
- ret = __rxrpc_create_call(conn, &call);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- call->pkt_rcv_count = 1;
- call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
- call->app_mark = sizeof(uint32_t);
-
- _state(call);
-
- /* attach to the connection */
- ret = -EBUSY;
- call->chan_ix = htonl(cix);
- call->call_id = msg->hdr.callNumber;
-
- spin_lock(&conn->lock);
-
- if (!conn->channels[cix] ||
- conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
- conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
- ) {
- conn->channels[cix] = call;
- rxrpc_get_connection(conn);
- ret = 0;
- }
-
- spin_unlock(&conn->lock);
-
- if (ret < 0) {
- free_page((unsigned long) call);
- call = NULL;
- }
-
- if (ret == 0) {
- down_write(&rxrpc_calls_sem);
- list_add_tail(&call->call_link, &rxrpc_calls);
- up_write(&rxrpc_calls_sem);
- __RXACCT(atomic_inc(&rxrpc_call_count));
- *_call = call;
- }
-
- _leave(" = %d [%p]", ret, call);
- return ret;
-} /* end rxrpc_incoming_call() */
-
-/*****************************************************************************/
-/*
- * free a call record
- */
-void rxrpc_put_call(struct rxrpc_call *call)
-{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_message *msg;
-
- _enter("%p{u=%d}",call,atomic_read(&call->usage));
-
- /* sanity check */
- if (atomic_read(&call->usage) <= 0)
- BUG();
-
- /* to prevent a race, the decrement and the de-list must be effectively
- * atomic */
- spin_lock(&conn->lock);
- if (likely(!atomic_dec_and_test(&call->usage))) {
- spin_unlock(&conn->lock);
- _leave("");
- return;
- }
-
- if (conn->channels[ntohl(call->chan_ix)] == call)
- conn->channels[ntohl(call->chan_ix)] = NULL;
-
- spin_unlock(&conn->lock);
-
- wake_up(&conn->chanwait);
-
- rxrpc_put_connection(conn);
-
- /* clear the timers and dequeue from krxiod */
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- rxrpc_krxiod_dequeue_call(call);
-
- /* clean up the contents of the struct */
- if (call->snd_nextmsg)
- rxrpc_put_message(call->snd_nextmsg);
-
- if (call->snd_ping)
- rxrpc_put_message(call->snd_ping);
-
- while (!list_empty(&call->acks_pendq)) {
- msg = list_entry(call->acks_pendq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->rcv_receiveq)) {
- msg = list_entry(call->rcv_receiveq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->app_readyq)) {
- msg = list_entry(call->app_readyq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->app_unreadyq)) {
- msg = list_entry(call->app_unreadyq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- module_put(call->owner);
-
- down_write(&rxrpc_calls_sem);
- list_del(&call->call_link);
- up_write(&rxrpc_calls_sem);
-
- __RXACCT(atomic_dec(&rxrpc_call_count));
- free_page((unsigned long) call);
-
- _leave(" [destroyed]");
-} /* end rxrpc_put_call() */
-
-/*****************************************************************************/
-/*
- * actually generate a normal ACK
- */
-static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
- rxrpc_seq_t seq)
-{
- struct rxrpc_message *msg;
- struct kvec diov[3];
- __be32 aux[4];
- int delta, ret;
-
- /* ACKs default to DELAY */
- if (!call->ackr.reason)
- call->ackr.reason = RXRPC_ACK_DELAY;
-
- _proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- jiffies - call->cjif,
- ntohs(call->ackr.maxSkew),
- ntohl(call->ackr.firstPacket),
- ntohl(call->ackr.previousPacket),
- ntohl(call->ackr.serial),
- rxrpc_acks[call->ackr.reason],
- call->ackr.nAcks);
-
- aux[0] = htonl(call->conn->peer->if_mtu); /* interface MTU */
- aux[1] = htonl(1444); /* max MTU */
- aux[2] = htonl(16); /* rwind */
- aux[3] = htonl(4); /* max packets */
-
- diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
- diov[0].iov_base = &call->ackr;
- diov[1].iov_len = call->ackr_pend_cnt + 3;
- diov[1].iov_base = call->ackr_array;
- diov[2].iov_len = sizeof(aux);
- diov[2].iov_base = &aux;
-
- /* build and send the message */
- ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
- 3, diov, GFP_KERNEL, &msg);
- if (ret < 0)
- goto out;
-
- msg->seq = seq;
- msg->hdr.seq = htonl(seq);
- msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
- ret = rxrpc_conn_sendmsg(call->conn, msg);
- rxrpc_put_message(msg);
- if (ret < 0)
- goto out;
- call->pkt_snd_count++;
-
- /* count how many actual ACKs there were at the front */
- for (delta = 0; delta < call->ackr_pend_cnt; delta++)
- if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
- break;
-
- call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
-
- /* crank the ACK window around */
- if (delta == 0) {
- /* un-ACK'd window */
- }
- else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
- /* partially ACK'd window
- * - shuffle down to avoid losing out-of-sequence packets
- */
- call->ackr_win_bot += delta;
- call->ackr_win_top += delta;
-
- memmove(&call->ackr_array[0],
- &call->ackr_array[delta],
- call->ackr_pend_cnt);
-
- memset(&call->ackr_array[call->ackr_pend_cnt],
- RXRPC_ACK_TYPE_NACK,
- sizeof(call->ackr_array) - call->ackr_pend_cnt);
- }
- else {
- /* fully ACK'd window
- * - just clear the whole thing
- */
- memset(&call->ackr_array,
- RXRPC_ACK_TYPE_NACK,
- sizeof(call->ackr_array));
- }
-
- /* clear this ACK */
- memset(&call->ackr, 0, sizeof(call->ackr));
-
- out:
- if (!call->app_call_state)
- printk("___ STATE 0 ___\n");
- return ret;
-} /* end __rxrpc_call_gen_normal_ACK() */
-
-/*****************************************************************************/
-/*
- * note the reception of a packet in the call's ACK records and generate an
- * appropriate ACK packet if necessary
- * - returns 0 if packet should be processed, 1 if packet should be ignored
- * and -ve on an error
- */
-static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
- struct rxrpc_header *hdr,
- struct rxrpc_ackpacket *ack)
-{
- struct rxrpc_message *msg;
- rxrpc_seq_t seq;
- unsigned offset;
- int ret = 0, err;
- u8 special_ACK, do_ACK, force;
-
- _enter("%p,%p { seq=%d tp=%d fl=%02x }",
- call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
-
- seq = ntohl(hdr->seq);
- offset = seq - call->ackr_win_bot;
- do_ACK = RXRPC_ACK_DELAY;
- special_ACK = 0;
- force = (seq == 1);
-
- if (call->ackr_high_seq < seq)
- call->ackr_high_seq = seq;
-
- /* deal with generation of obvious special ACKs first */
- if (ack && ack->reason == RXRPC_ACK_PING) {
- special_ACK = RXRPC_ACK_PING_RESPONSE;
- ret = 1;
- goto gen_ACK;
- }
-
- if (seq < call->ackr_win_bot) {
- special_ACK = RXRPC_ACK_DUPLICATE;
- ret = 1;
- goto gen_ACK;
- }
-
- if (seq >= call->ackr_win_top) {
- special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
- ret = 1;
- goto gen_ACK;
- }
-
- if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
- special_ACK = RXRPC_ACK_DUPLICATE;
- ret = 1;
- goto gen_ACK;
- }
-
- /* okay... it's a normal data packet inside the ACK window */
- call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
-
- if (offset < call->ackr_pend_cnt) {
- }
- else if (offset > call->ackr_pend_cnt) {
- do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
- call->ackr_pend_cnt = offset;
- goto gen_ACK;
- }
-
- if (hdr->flags & RXRPC_REQUEST_ACK) {
- do_ACK = RXRPC_ACK_REQUESTED;
- }
-
- /* generate an ACK on the final packet of a reply just received */
- if (hdr->flags & RXRPC_LAST_PACKET) {
- if (call->conn->out_clientflag)
- force = 1;
- }
- else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
- do_ACK = RXRPC_ACK_REQUESTED;
- }
-
- /* re-ACK packets previously received out-of-order */
- for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
- if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
- break;
-
- call->ackr_pend_cnt = offset;
-
- /* generate an ACK if we fill up the window */
- if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
- force = 1;
-
- gen_ACK:
- _debug("%05lu ACKs pend=%u norm=%s special=%s%s",
- jiffies - call->cjif,
- call->ackr_pend_cnt,
- rxrpc_acks[do_ACK],
- rxrpc_acks[special_ACK],
- force ? " immediate" :
- do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
- hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
- " defer"
- );
-
- /* send any pending normal ACKs if need be */
- if (call->ackr_pend_cnt > 0) {
- /* fill out the appropriate form */
- call->ackr.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
- call->ackr.maxSkew = htons(min(call->ackr_high_seq - seq,
- 65535U));
- call->ackr.firstPacket = htonl(call->ackr_win_bot);
- call->ackr.previousPacket = call->ackr_prev_seq;
- call->ackr.serial = hdr->serial;
- call->ackr.nAcks = call->ackr_pend_cnt;
-
- if (do_ACK == RXRPC_ACK_REQUESTED)
- call->ackr.reason = do_ACK;
-
- /* generate the ACK immediately if necessary */
- if (special_ACK || force) {
- err = __rxrpc_call_gen_normal_ACK(
- call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
- if (err < 0) {
- ret = err;
- goto out;
- }
- }
- }
-
- if (call->ackr.reason == RXRPC_ACK_REQUESTED)
- call->ackr_dfr_seq = seq;
-
- /* start the ACK timer if not running if there are any pending deferred
- * ACKs */
- if (call->ackr_pend_cnt > 0 &&
- call->ackr.reason != RXRPC_ACK_REQUESTED &&
- !timer_pending(&call->ackr_dfr_timo)
- ) {
- unsigned long timo;
-
- timo = rxrpc_call_dfr_ack_timeout + jiffies;
-
- _debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
-
- spin_lock(&call->lock);
- mod_timer(&call->ackr_dfr_timo, timo);
- spin_unlock(&call->lock);
- }
- else if ((call->ackr_pend_cnt == 0 ||
- call->ackr.reason == RXRPC_ACK_REQUESTED) &&
- timer_pending(&call->ackr_dfr_timo)
- ) {
- /* stop timer if no pending ACKs */
- _debug("CLEAR ACKR TIMER");
- del_timer_sync(&call->ackr_dfr_timo);
- }
-
- /* send a special ACK if one is required */
- if (special_ACK) {
- struct rxrpc_ackpacket ack;
- struct kvec diov[2];
- uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
-
- /* fill out the appropriate form */
- ack.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
- ack.maxSkew = htons(min(call->ackr_high_seq - seq,
- 65535U));
- ack.firstPacket = htonl(call->ackr_win_bot);
- ack.previousPacket = call->ackr_prev_seq;
- ack.serial = hdr->serial;
- ack.reason = special_ACK;
- ack.nAcks = 0;
-
- _proto("Rx Sending s-ACK"
- " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- ntohs(ack.maxSkew),
- ntohl(ack.firstPacket),
- ntohl(ack.previousPacket),
- ntohl(ack.serial),
- rxrpc_acks[ack.reason],
- ack.nAcks);
-
- diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
- diov[0].iov_base = &ack;
- diov[1].iov_len = sizeof(acks);
- diov[1].iov_base = acks;
-
- /* build and send the message */
- err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
- hdr->seq ? 2 : 1, diov,
- GFP_KERNEL,
- &msg);
- if (err < 0) {
- ret = err;
- goto out;
- }
-
- msg->seq = seq;
- msg->hdr.seq = htonl(seq);
- msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
- err = rxrpc_conn_sendmsg(call->conn, msg);
- rxrpc_put_message(msg);
- if (err < 0) {
- ret = err;
- goto out;
- }
- call->pkt_snd_count++;
- }
-
- out:
- if (hdr->seq)
- call->ackr_prev_seq = hdr->seq;
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_call_generate_ACK() */
-
-/*****************************************************************************/
-/*
- * handle work to be done on a call
- * - includes packet reception and timeout processing
- */
-void rxrpc_call_do_stuff(struct rxrpc_call *call)
-{
- _enter("%p{flags=%lx}", call, call->flags);
-
- /* handle packet reception */
- if (call->flags & RXRPC_CALL_RCV_PKT) {
- _debug("- receive packet");
- call->flags &= ~RXRPC_CALL_RCV_PKT;
- rxrpc_call_receive_packet(call);
- }
-
- /* handle overdue ACKs */
- if (call->flags & RXRPC_CALL_ACKS_TIMO) {
- _debug("- overdue ACK timeout");
- call->flags &= ~RXRPC_CALL_ACKS_TIMO;
- rxrpc_call_resend(call, call->snd_seq_count);
- }
-
- /* handle lack of reception */
- if (call->flags & RXRPC_CALL_RCV_TIMO) {
- _debug("- reception timeout");
- call->flags &= ~RXRPC_CALL_RCV_TIMO;
- rxrpc_call_abort(call, -EIO);
- }
-
- /* handle deferred ACKs */
- if (call->flags & RXRPC_CALL_ACKR_TIMO ||
- (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
- ) {
- _debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
- jiffies - call->cjif,
- rxrpc_acks[call->ackr.reason],
- call->ackr.nAcks);
-
- call->flags &= ~RXRPC_CALL_ACKR_TIMO;
-
- if (call->ackr.nAcks > 0 &&
- call->app_call_state != RXRPC_CSTATE_ERROR) {
- /* generate ACK */
- __rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
- call->ackr_dfr_seq = 0;
- }
- }
-
- _leave("");
-
-} /* end rxrpc_call_do_stuff() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - must be called with call->lock held
- * - the supplied error code is sent as the packet data
- */
-static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
-{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_message *msg;
- struct kvec diov[1];
- int ret;
- __be32 _error;
-
- _enter("%p{%08x},%p{%d},%d",
- conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
-
- /* if this call is already aborted, then just wake up any waiters */
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- spin_unlock(&call->lock);
- call->app_error_func(call);
- _leave(" = 0");
- return 0;
- }
-
- rxrpc_get_call(call);
-
- /* change the state _with_ the lock still held */
- call->app_call_state = RXRPC_CSTATE_ERROR;
- call->app_err_state = RXRPC_ESTATE_LOCAL_ABORT;
- call->app_errno = errno;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- _state(call);
-
- /* ask the app to translate the error code */
- call->app_aemap_func(call);
-
- spin_unlock(&call->lock);
-
- /* flush any outstanding ACKs */
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- if (rxrpc_call_is_ack_pending(call))
- __rxrpc_call_gen_normal_ACK(call, 0);
-
- /* send the abort packet only if we actually traded some other
- * packets */
- ret = 0;
- if (call->pkt_snd_count || call->pkt_rcv_count) {
- /* actually send the abort */
- _proto("Rx Sending Call ABORT { data=%d }",
- call->app_abort_code);
-
- _error = htonl(call->app_abort_code);
-
- diov[0].iov_len = sizeof(_error);
- diov[0].iov_base = &_error;
-
- ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
- 1, diov, GFP_KERNEL, &msg);
- if (ret == 0) {
- ret = rxrpc_conn_sendmsg(conn, msg);
- rxrpc_put_message(msg);
- }
- }
-
- /* tell the app layer to let go */
- call->app_error_func(call);
-
- rxrpc_put_call(call);
-
- _leave(" = %d", ret);
- return ret;
-} /* end __rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - the supplied error code is sent as the packet data
- */
-int rxrpc_call_abort(struct rxrpc_call *call, int error)
-{
- spin_lock(&call->lock);
-
- return __rxrpc_call_abort(call, error);
-
-} /* end rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * process packets waiting for this call
- */
-static void rxrpc_call_receive_packet(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- struct list_head *_p;
-
- _enter("%p", call);
-
- rxrpc_get_call(call); /* must not go away too soon if aborted by
- * app-layer */
-
- while (!list_empty(&call->rcv_receiveq)) {
- /* try to get next packet */
- _p = NULL;
- spin_lock(&call->lock);
- if (!list_empty(&call->rcv_receiveq)) {
- _p = call->rcv_receiveq.next;
- list_del_init(_p);
- }
- spin_unlock(&call->lock);
-
- if (!_p)
- break;
-
- msg = list_entry(_p, struct rxrpc_message, link);
-
- _proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
- jiffies - call->cjif,
- rxrpc_pkts[msg->hdr.type],
- ntohl(msg->hdr.serial),
- msg->seq,
- msg->hdr.flags & RXRPC_JUMBO_PACKET ? 'j' : '-',
- msg->hdr.flags & RXRPC_MORE_PACKETS ? 'm' : '-',
- msg->hdr.flags & RXRPC_LAST_PACKET ? 'l' : '-',
- msg->hdr.flags & RXRPC_REQUEST_ACK ? 'r' : '-',
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? 'C' : 'S'
- );
-
- switch (msg->hdr.type) {
- /* deal with data packets */
- case RXRPC_PACKET_TYPE_DATA:
- /* ACK the packet if necessary */
- switch (rxrpc_call_generate_ACK(call, &msg->hdr,
- NULL)) {
- case 0: /* useful packet */
- rxrpc_call_receive_data_packet(call, msg);
- break;
- case 1: /* duplicate or out-of-window packet */
- break;
- default:
- rxrpc_put_message(msg);
- goto out;
- }
- break;
-
- /* deal with ACK packets */
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_call_receive_ack_packet(call, msg);
- break;
-
- /* deal with abort packets */
- case RXRPC_PACKET_TYPE_ABORT: {
- __be32 _dbuf, *dp;
-
- dp = skb_header_pointer(msg->pkt, msg->offset,
- sizeof(_dbuf), &_dbuf);
- if (dp == NULL)
- printk("Rx Received short ABORT packet\n");
-
- _proto("Rx Received Call ABORT { data=%d }",
- (dp ? ntohl(*dp) : 0));
-
- spin_lock(&call->lock);
- call->app_call_state = RXRPC_CSTATE_ERROR;
- call->app_err_state = RXRPC_ESTATE_PEER_ABORT;
- call->app_abort_code = (dp ? ntohl(*dp) : 0);
- call->app_errno = -ECONNABORTED;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- /* ask the app to translate the error code */
- call->app_aemap_func(call);
- _state(call);
- spin_unlock(&call->lock);
- call->app_error_func(call);
- break;
- }
- default:
- /* deal with other packet types */
- _proto("Rx Unsupported packet type %u (#%u)",
- msg->hdr.type, msg->seq);
- break;
- }
-
- rxrpc_put_message(msg);
- }
-
- out:
- rxrpc_put_call(call);
- _leave("");
-} /* end rxrpc_call_receive_packet() */
-
-/*****************************************************************************/
-/*
- * process next data packet
- * - as the next data packet arrives:
- * - it is queued on app_readyq _if_ it is the next one expected
- * (app_ready_seq+1)
- * - it is queued on app_unreadyq _if_ it is not the next one expected
- * - if a packet placed on app_readyq completely fills a hole leading up to
- * the first packet on app_unreadyq, then packets now in sequence are
- * tranferred to app_readyq
- * - the application layer can only see packets on app_readyq
- * (app_ready_qty bytes)
- * - the application layer is prodded every time a new packet arrives
- */
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- const struct rxrpc_operation *optbl, *op;
- struct rxrpc_message *pmsg;
- struct list_head *_p;
- int ret, lo, hi, rmtimo;
- __be32 opid;
-
- _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
- rxrpc_get_message(msg);
-
- /* add to the unready queue if we'd have to create a hole in the ready
- * queue otherwise */
- if (msg->seq != call->app_ready_seq + 1) {
- _debug("Call add packet %d to unreadyq", msg->seq);
-
- /* insert in seq order */
- list_for_each(_p, &call->app_unreadyq) {
- pmsg = list_entry(_p, struct rxrpc_message, link);
- if (pmsg->seq > msg->seq)
- break;
- }
-
- list_add_tail(&msg->link, _p);
-
- _leave(" [unreadyq]");
- return;
- }
-
- /* next in sequence - simply append into the call's ready queue */
- _debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
- msg->seq, msg->dsize, call->app_ready_qty);
-
- spin_lock(&call->lock);
- call->app_ready_seq = msg->seq;
- call->app_ready_qty += msg->dsize;
- list_add_tail(&msg->link, &call->app_readyq);
-
- /* move unready packets to the readyq if we got rid of a hole */
- while (!list_empty(&call->app_unreadyq)) {
- pmsg = list_entry(call->app_unreadyq.next,
- struct rxrpc_message, link);
-
- if (pmsg->seq != call->app_ready_seq + 1)
- break;
-
- /* next in sequence - just move list-to-list */
- _debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
- pmsg->seq, pmsg->dsize, call->app_ready_qty);
-
- call->app_ready_seq = pmsg->seq;
- call->app_ready_qty += pmsg->dsize;
- list_move_tail(&pmsg->link, &call->app_readyq);
- }
-
- /* see if we've got the last packet yet */
- if (!list_empty(&call->app_readyq)) {
- pmsg = list_entry(call->app_readyq.prev,
- struct rxrpc_message, link);
- if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_last_rcv = 1;
- _debug("Last packet on readyq");
- }
- }
-
- switch (call->app_call_state) {
- /* do nothing if call already aborted */
- case RXRPC_CSTATE_ERROR:
- spin_unlock(&call->lock);
- _leave(" [error]");
- return;
-
- /* extract the operation ID from an incoming call if that's not
- * yet been done */
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- spin_unlock(&call->lock);
-
- /* handle as yet insufficient data for the operation ID */
- if (call->app_ready_qty < 4) {
- if (call->app_last_rcv)
- /* trouble - last packet seen */
- rxrpc_call_abort(call, -EINVAL);
-
- _leave("");
- return;
- }
-
- /* pull the operation ID out of the buffer */
- ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
- if (ret < 0) {
- printk("Unexpected error from read-data: %d\n", ret);
- if (call->app_call_state != RXRPC_CSTATE_ERROR)
- rxrpc_call_abort(call, ret);
- _leave("");
- return;
- }
- call->app_opcode = ntohl(opid);
-
- /* locate the operation in the available ops table */
- optbl = call->conn->service->ops_begin;
- lo = 0;
- hi = call->conn->service->ops_end - optbl;
-
- while (lo < hi) {
- int mid = (hi + lo) / 2;
- op = &optbl[mid];
- if (call->app_opcode == op->id)
- goto found_op;
- if (call->app_opcode > op->id)
- lo = mid + 1;
- else
- hi = mid;
- }
-
- /* search failed */
- kproto("Rx Client requested operation %d from %s service",
- call->app_opcode, call->conn->service->name);
- rxrpc_call_abort(call, -EINVAL);
- _leave(" [inval]");
- return;
-
- found_op:
- _proto("Rx Client requested operation %s from %s service",
- op->name, call->conn->service->name);
-
- /* we're now waiting for the argument block (unless the call
- * was aborted) */
- spin_lock(&call->lock);
- if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
- call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
- if (!call->app_last_rcv)
- call->app_call_state =
- RXRPC_CSTATE_SRVR_RCV_ARGS;
- else if (call->app_ready_qty > 0)
- call->app_call_state =
- RXRPC_CSTATE_SRVR_GOT_ARGS;
- else
- call->app_call_state =
- RXRPC_CSTATE_SRVR_SND_REPLY;
- call->app_mark = op->asize;
- call->app_user = op->user;
- }
- spin_unlock(&call->lock);
-
- _state(call);
- break;
-
- case RXRPC_CSTATE_SRVR_RCV_ARGS:
- /* change state if just received last packet of arg block */
- if (call->app_last_rcv)
- call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
- spin_unlock(&call->lock);
-
- _state(call);
- break;
-
- case RXRPC_CSTATE_CLNT_RCV_REPLY:
- /* change state if just received last packet of reply block */
- rmtimo = 0;
- if (call->app_last_rcv) {
- call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
- rmtimo = 1;
- }
- spin_unlock(&call->lock);
-
- if (rmtimo) {
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- }
-
- _state(call);
- break;
-
- default:
- /* deal with data reception in an unexpected state */
- printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
- __rxrpc_call_abort(call, -EBADMSG);
- _leave("");
- return;
- }
-
- if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
- call->app_last_rcv)
- BUG();
-
- /* otherwise just invoke the data function whenever we can satisfy its desire for more
- * data
- */
- _proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
- call->app_call_state, call->app_ready_qty, call->app_mark,
- call->app_last_rcv ? " last-rcvd" : "");
-
- spin_lock(&call->lock);
-
- ret = __rxrpc_call_read_data(call);
- switch (ret) {
- case 0:
- spin_unlock(&call->lock);
- call->app_attn_func(call);
- break;
- case -EAGAIN:
- spin_unlock(&call->lock);
- break;
- case -ECONNABORTED:
- spin_unlock(&call->lock);
- break;
- default:
- __rxrpc_call_abort(call, ret);
- break;
- }
-
- _state(call);
-
- _leave("");
-
-} /* end rxrpc_call_receive_data_packet() */
-
-/*****************************************************************************/
-/*
- * received an ACK packet
- */
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- struct rxrpc_ackpacket _ack, *ap;
- rxrpc_serial_net_t serial;
- rxrpc_seq_t seq;
- int ret;
-
- _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
- /* extract the basic ACK record */
- ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
- if (ap == NULL) {
- printk("Rx Received short ACK packet\n");
- return;
- }
- msg->offset += sizeof(_ack);
-
- serial = ap->serial;
- seq = ntohl(ap->firstPacket);
-
- _proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
- ntohl(msg->hdr.serial),
- ntohs(ap->bufferSpace),
- ntohs(ap->maxSkew),
- seq,
- ntohl(ap->previousPacket),
- ntohl(serial),
- rxrpc_acks[ap->reason],
- call->ackr.nAcks
- );
-
- /* check the other side isn't ACK'ing a sequence number I haven't sent
- * yet */
- if (ap->nAcks > 0 &&
- (seq > call->snd_seq_count ||
- seq + ap->nAcks - 1 > call->snd_seq_count)) {
- printk("Received ACK (#%u-#%u) for unsent packet\n",
- seq, seq + ap->nAcks - 1);
- rxrpc_call_abort(call, -EINVAL);
- _leave("");
- return;
- }
-
- /* deal with RTT calculation */
- if (serial) {
- struct rxrpc_message *rttmsg;
-
- /* find the prompting packet */
- spin_lock(&call->lock);
- if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
- /* it was a ping packet */
- rttmsg = call->snd_ping;
- call->snd_ping = NULL;
- spin_unlock(&call->lock);
-
- if (rttmsg) {
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(call->conn->peer,
- rttmsg, msg);
- rxrpc_put_message(rttmsg);
- }
- }
- else {
- struct list_head *_p;
-
- /* it ought to be a data packet - look in the pending
- * ACK list */
- list_for_each(_p, &call->acks_pendq) {
- rttmsg = list_entry(_p, struct rxrpc_message,
- link);
- if (rttmsg->hdr.serial == serial) {
- if (rttmsg->rttdone)
- /* never do RTT twice without
- * resending */
- break;
-
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(
- call->conn->peer, rttmsg, msg);
- break;
- }
- }
- spin_unlock(&call->lock);
- }
- }
-
- switch (ap->reason) {
- /* deal with negative/positive acknowledgement of data
- * packets */
- case RXRPC_ACK_REQUESTED:
- case RXRPC_ACK_DELAY:
- case RXRPC_ACK_IDLE:
- rxrpc_call_definitively_ACK(call, seq - 1);
-
- case RXRPC_ACK_DUPLICATE:
- case RXRPC_ACK_OUT_OF_SEQUENCE:
- case RXRPC_ACK_EXCEEDS_WINDOW:
- call->snd_resend_cnt = 0;
- ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
- if (ret < 0)
- rxrpc_call_abort(call, ret);
- break;
-
- /* respond to ping packets immediately */
- case RXRPC_ACK_PING:
- rxrpc_call_generate_ACK(call, &msg->hdr, ap);
- break;
-
- /* only record RTT on ping response packets */
- case RXRPC_ACK_PING_RESPONSE:
- if (call->snd_ping) {
- struct rxrpc_message *rttmsg;
-
- /* only do RTT stuff if the response matches the
- * retained ping */
- rttmsg = NULL;
- spin_lock(&call->lock);
- if (call->snd_ping &&
- call->snd_ping->hdr.serial == ap->serial) {
- rttmsg = call->snd_ping;
- call->snd_ping = NULL;
- }
- spin_unlock(&call->lock);
-
- if (rttmsg) {
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(call->conn->peer,
- rttmsg, msg);
- rxrpc_put_message(rttmsg);
- }
- }
- break;
-
- default:
- printk("Unsupported ACK reason %u\n", ap->reason);
- break;
- }
-
- _leave("");
-} /* end rxrpc_call_receive_ack_packet() */
-
-/*****************************************************************************/
-/*
- * record definitive ACKs for all messages up to and including the one with the
- * 'highest' seq
- */
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
- rxrpc_seq_t highest)
-{
- struct rxrpc_message *msg;
- int now_complete;
-
- _enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
-
- while (call->acks_dftv_seq < highest) {
- call->acks_dftv_seq++;
-
- _proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
-
- /* discard those at front of queue until message with highest
- * ACK is found */
- spin_lock(&call->lock);
- msg = NULL;
- if (!list_empty(&call->acks_pendq)) {
- msg = list_entry(call->acks_pendq.next,
- struct rxrpc_message, link);
- list_del_init(&msg->link); /* dequeue */
- if (msg->state == RXRPC_MSG_SENT)
- call->acks_pend_cnt--;
- }
- spin_unlock(&call->lock);
-
- /* insanity check */
- if (!msg)
- panic("%s(): acks_pendq unexpectedly empty\n",
- __FUNCTION__);
-
- if (msg->seq != call->acks_dftv_seq)
- panic("%s(): Packet #%u expected at front of acks_pendq"
- " (#%u found)\n",
- __FUNCTION__, call->acks_dftv_seq, msg->seq);
-
- /* discard the message */
- msg->state = RXRPC_MSG_DONE;
- rxrpc_put_message(msg);
- }
-
- /* if all sent packets are definitively ACK'd then prod any sleepers just in case */
- now_complete = 0;
- spin_lock(&call->lock);
- if (call->acks_dftv_seq == call->snd_seq_count) {
- if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- now_complete = 1;
- }
- }
- spin_unlock(&call->lock);
-
- if (now_complete) {
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- call->app_attn_func(call);
- }
-
- _leave("");
-} /* end rxrpc_call_definitively_ACK() */
-
-/*****************************************************************************/
-/*
- * record the specified amount of ACKs/NAKs
- */
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
- struct rxrpc_message *msg,
- rxrpc_seq_t seq,
- size_t count)
-{
- struct rxrpc_message *dmsg;
- struct list_head *_p;
- rxrpc_seq_t highest;
- unsigned ix;
- size_t chunk;
- char resend, now_complete;
- u8 acks[16];
-
- _enter("%p{apc=%u ads=%u},%p,%u,%Zu",
- call, call->acks_pend_cnt, call->acks_dftv_seq,
- msg, seq, count);
-
- /* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
- * ACKs) */
- if (seq <= call->acks_dftv_seq) {
- unsigned delta = call->acks_dftv_seq - seq;
-
- if (count <= delta) {
- _leave(" = 0 [all definitively ACK'd]");
- return 0;
- }
-
- seq += delta;
- count -= delta;
- msg->offset += delta;
- }
-
- highest = seq + count - 1;
- resend = 0;
- while (count > 0) {
- /* extract up to 16 ACK slots at a time */
- chunk = min(count, sizeof(acks));
- count -= chunk;
-
- memset(acks, 2, sizeof(acks));
-
- if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
- printk("Rx Received short ACK packet\n");
- _leave(" = -EINVAL");
- return -EINVAL;
- }
- msg->offset += chunk;
-
- /* check that the ACK set is valid */
- for (ix = 0; ix < chunk; ix++) {
- switch (acks[ix]) {
- case RXRPC_ACK_TYPE_ACK:
- break;
- case RXRPC_ACK_TYPE_NACK:
- resend = 1;
- break;
- default:
- printk("Rx Received unsupported ACK state"
- " %u\n", acks[ix]);
- _leave(" = -EINVAL");
- return -EINVAL;
- }
- }
-
- _proto("Rx ACK of packets #%u-#%u "
- "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
- seq, (unsigned) (seq + chunk - 1),
- _acktype[acks[0x0]],
- _acktype[acks[0x1]],
- _acktype[acks[0x2]],
- _acktype[acks[0x3]],
- _acktype[acks[0x4]],
- _acktype[acks[0x5]],
- _acktype[acks[0x6]],
- _acktype[acks[0x7]],
- _acktype[acks[0x8]],
- _acktype[acks[0x9]],
- _acktype[acks[0xA]],
- _acktype[acks[0xB]],
- _acktype[acks[0xC]],
- _acktype[acks[0xD]],
- _acktype[acks[0xE]],
- _acktype[acks[0xF]],
- call->acks_pend_cnt
- );
-
- /* mark the packets in the ACK queue as being provisionally
- * ACK'd */
- ix = 0;
- spin_lock(&call->lock);
-
- /* find the first packet ACK'd/NAK'd here */
- list_for_each(_p, &call->acks_pendq) {
- dmsg = list_entry(_p, struct rxrpc_message, link);
- if (dmsg->seq == seq)
- goto found_first;
- _debug("- %u: skipping #%u", ix, dmsg->seq);
- }
- goto bad_queue;
-
- found_first:
- do {
- _debug("- %u: processing #%u (%c) apc=%u",
- ix, dmsg->seq, _acktype[acks[ix]],
- call->acks_pend_cnt);
-
- if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
- if (dmsg->state == RXRPC_MSG_SENT)
- call->acks_pend_cnt--;
- dmsg->state = RXRPC_MSG_ACKED;
- }
- else {
- if (dmsg->state == RXRPC_MSG_ACKED)
- call->acks_pend_cnt++;
- dmsg->state = RXRPC_MSG_SENT;
- }
- ix++;
- seq++;
-
- _p = dmsg->link.next;
- dmsg = list_entry(_p, struct rxrpc_message, link);
- } while(ix < chunk &&
- _p != &call->acks_pendq &&
- dmsg->seq == seq);
-
- if (ix < chunk)
- goto bad_queue;
-
- spin_unlock(&call->lock);
- }
-
- if (resend)
- rxrpc_call_resend(call, highest);
-
- /* if all packets are provisionally ACK'd, then wake up anyone who's
- * waiting for that */
- now_complete = 0;
- spin_lock(&call->lock);
- if (call->acks_pend_cnt == 0) {
- if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- }
- now_complete = 1;
- }
- spin_unlock(&call->lock);
-
- if (now_complete) {
- _debug("- wake up waiters");
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- call->app_attn_func(call);
- }
-
- _leave(" = 0 (apc=%u)", call->acks_pend_cnt);
- return 0;
-
- bad_queue:
- panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
- __FUNCTION__, seq);
-
-} /* end rxrpc_call_record_ACK() */
-
-/*****************************************************************************/
-/*
- * transfer data from the ready packet queue to the asynchronous read buffer
- * - since this func is the only one going to look at packets queued on
- * app_readyq, we don't need a lock to modify or access them, only to modify
- * the queue pointers
- * - called with call->lock held
- * - the buffer must be in kernel space
- * - returns:
- * 0 if buffer filled
- * -EAGAIN if buffer not filled and more data to come
- * -EBADMSG if last packet received and insufficient data left
- * -ECONNABORTED if the call has in an error state
- */
-static int __rxrpc_call_read_data(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- size_t qty;
- int ret;
-
- _enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
- call,
- call->app_async_read, call->app_read_buf,
- call->app_ready_qty, call->app_mark);
-
- /* check the state */
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_RCV_ARGS:
- case RXRPC_CSTATE_CLNT_RCV_REPLY:
- if (call->app_last_rcv) {
- printk("%s(%p,%p,%Zd):"
- " Inconsistent call state (%s, last pkt)",
- __FUNCTION__,
- call, call->app_read_buf, call->app_mark,
- rxrpc_call_states[call->app_call_state]);
- BUG();
- }
- break;
-
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- case RXRPC_CSTATE_SRVR_GOT_ARGS:
- case RXRPC_CSTATE_CLNT_GOT_REPLY:
- break;
-
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- if (!call->app_last_rcv) {
- printk("%s(%p,%p,%Zd):"
- " Inconsistent call state (%s, not last pkt)",
- __FUNCTION__,
- call, call->app_read_buf, call->app_mark,
- rxrpc_call_states[call->app_call_state]);
- BUG();
- }
- _debug("Trying to read data from call in SND_REPLY state");
- break;
-
- case RXRPC_CSTATE_ERROR:
- _leave(" = -ECONNABORTED");
- return -ECONNABORTED;
-
- default:
- printk("reading in unexpected state [[[ %u ]]]\n",
- call->app_call_state);
- BUG();
- }
-
- /* handle the case of not having an async buffer */
- if (!call->app_async_read) {
- if (call->app_mark == RXRPC_APP_MARK_EOF) {
- ret = call->app_last_rcv ? 0 : -EAGAIN;
- }
- else {
- if (call->app_mark >= call->app_ready_qty) {
- call->app_mark = RXRPC_APP_MARK_EOF;
- ret = 0;
- }
- else {
- ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
- }
- }
-
- _leave(" = %d [no buf]", ret);
- return 0;
- }
-
- while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
- msg = list_entry(call->app_readyq.next,
- struct rxrpc_message, link);
-
- /* drag as much data as we need out of this packet */
- qty = min(call->app_mark, msg->dsize);
-
- _debug("reading %Zu from skb=%p off=%lu",
- qty, msg->pkt, msg->offset);
-
- if (call->app_read_buf)
- if (skb_copy_bits(msg->pkt, msg->offset,
- call->app_read_buf, qty) < 0)
- panic("%s: Failed to copy data from packet:"
- " (%p,%p,%Zd)",
- __FUNCTION__,
- call, call->app_read_buf, qty);
-
- /* if that packet is now empty, discard it */
- call->app_ready_qty -= qty;
- msg->dsize -= qty;
-
- if (msg->dsize == 0) {
- list_del_init(&msg->link);
- rxrpc_put_message(msg);
- }
- else {
- msg->offset += qty;
- }
-
- call->app_mark -= qty;
- if (call->app_read_buf)
- call->app_read_buf += qty;
- }
-
- if (call->app_mark == 0) {
- call->app_async_read = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
-
- /* adjust the state if used up all packets */
- if (list_empty(&call->app_readyq) && call->app_last_rcv) {
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
- call->app_mark = RXRPC_APP_MARK_EOF;
- _state(call);
- del_timer_sync(&call->rcv_timeout);
- break;
- case RXRPC_CSTATE_SRVR_GOT_ARGS:
- call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
- _state(call);
- del_timer_sync(&call->rcv_timeout);
- break;
- default:
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- del_timer_sync(&call->rcv_timeout);
- break;
- }
- }
-
- _leave(" = 0");
- return 0;
- }
-
- if (call->app_last_rcv) {
- _debug("Insufficient data (%Zu/%Zu)",
- call->app_ready_qty, call->app_mark);
- call->app_async_read = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
-
- _leave(" = -EBADMSG");
- return -EBADMSG;
- }
-
- _leave(" = -EAGAIN");
- return -EAGAIN;
-} /* end __rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * attempt to read the specified amount of data from the call's ready queue
- * into the buffer provided
- * - since this func is the only one going to look at packets queued on
- * app_readyq, we don't need a lock to modify or access them, only to modify
- * the queue pointers
- * - if the buffer pointer is NULL, then data is merely drained, not copied
- * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
- * enough data or an error will be generated
- * - note that the caller must have added the calling task to the call's wait
- * queue beforehand
- * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
- * function doesn't read all available data
- */
-int rxrpc_call_read_data(struct rxrpc_call *call,
- void *buffer, size_t size, int flags)
-{
- int ret;
-
- _enter("%p{arq=%Zu},%p,%Zd,%x",
- call, call->app_ready_qty, buffer, size, flags);
-
- spin_lock(&call->lock);
-
- if (unlikely(!!call->app_read_buf)) {
- spin_unlock(&call->lock);
- _leave(" = -EBUSY");
- return -EBUSY;
- }
-
- call->app_mark = size;
- call->app_read_buf = buffer;
- call->app_async_read = 1;
- call->app_read_count++;
-
- /* read as much data as possible */
- ret = __rxrpc_call_read_data(call);
- switch (ret) {
- case 0:
- if (flags & RXRPC_CALL_READ_ALL &&
- (!call->app_last_rcv || call->app_ready_qty > 0)) {
- _leave(" = -EBADMSG");
- __rxrpc_call_abort(call, -EBADMSG);
- return -EBADMSG;
- }
-
- spin_unlock(&call->lock);
- call->app_attn_func(call);
- _leave(" = 0");
- return ret;
-
- case -ECONNABORTED:
- spin_unlock(&call->lock);
- _leave(" = %d [aborted]", ret);
- return ret;
-
- default:
- __rxrpc_call_abort(call, ret);
- _leave(" = %d", ret);
- return ret;
-
- case -EAGAIN:
- spin_unlock(&call->lock);
-
- if (!(flags & RXRPC_CALL_READ_BLOCK)) {
- _leave(" = -EAGAIN");
- return -EAGAIN;
- }
-
- /* wait for the data to arrive */
- _debug("blocking for data arrival");
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!call->app_async_read || signal_pending(current))
- break;
- schedule();
- }
- set_current_state(TASK_RUNNING);
-
- if (signal_pending(current)) {
- _leave(" = -EINTR");
- return -EINTR;
- }
-
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- _leave(" = -ECONNABORTED");
- return -ECONNABORTED;
- }
-
- _leave(" = 0");
- return 0;
- }
-
-} /* end rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * write data to a call
- * - the data may not be sent immediately if it doesn't fill a buffer
- * - if we can't queue all the data for buffering now, siov[] will have been
- * adjusted to take account of what has been sent
- */
-int rxrpc_call_write_data(struct rxrpc_call *call,
- size_t sioc,
- struct kvec *siov,
- u8 rxhdr_flags,
- gfp_t alloc_flags,
- int dup_data,
- size_t *size_sent)
-{
- struct rxrpc_message *msg;
- struct kvec *sptr;
- size_t space, size, chunk, tmp;
- char *buf;
- int ret;
-
- _enter("%p,%Zu,%p,%02x,%x,%d,%p",
- call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
- size_sent);
-
- *size_sent = 0;
- size = 0;
- ret = -EINVAL;
-
- /* can't send more if we've sent last packet from this end */
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- case RXRPC_CSTATE_CLNT_SND_ARGS:
- break;
- case RXRPC_CSTATE_ERROR:
- ret = call->app_errno;
- default:
- goto out;
- }
-
- /* calculate how much data we've been given */
- sptr = siov;
- for (; sioc > 0; sptr++, sioc--) {
- if (!sptr->iov_len)
- continue;
-
- if (!sptr->iov_base)
- goto out;
-
- size += sptr->iov_len;
- }
-
- _debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
-
- do {
- /* make sure there's a message under construction */
- if (!call->snd_nextmsg) {
- /* no - allocate a message with no data yet attached */
- ret = rxrpc_conn_newmsg(call->conn, call,
- RXRPC_PACKET_TYPE_DATA,
- 0, NULL, alloc_flags,
- &call->snd_nextmsg);
- if (ret < 0)
- goto out;
- _debug("- allocated new message [ds=%Zu]",
- call->snd_nextmsg->dsize);
- }
-
- msg = call->snd_nextmsg;
- msg->hdr.flags |= rxhdr_flags;
-
- /* deal with zero-length terminal packet */
- if (size == 0) {
- if (rxhdr_flags & RXRPC_LAST_PACKET) {
- ret = rxrpc_call_flush(call);
- if (ret < 0)
- goto out;
- }
- break;
- }
-
- /* work out how much space current packet has available */
- space = call->conn->mtu_size - msg->dsize;
- chunk = min(space, size);
-
- _debug("- [before] space=%Zu chunk=%Zu", space, chunk);
-
- while (!siov->iov_len)
- siov++;
-
- /* if we are going to have to duplicate the data then coalesce
- * it too */
- if (dup_data) {
- /* don't allocate more that 1 page at a time */
- if (chunk > PAGE_SIZE)
- chunk = PAGE_SIZE;
-
- /* allocate a data buffer and attach to the message */
- buf = kmalloc(chunk, alloc_flags);
- if (unlikely(!buf)) {
- if (msg->dsize ==
- sizeof(struct rxrpc_header)) {
- /* discard an empty msg and wind back
- * the seq counter */
- rxrpc_put_message(msg);
- call->snd_nextmsg = NULL;
- call->snd_seq_count--;
- }
-
- ret = -ENOMEM;
- goto out;
- }
-
- tmp = msg->dcount++;
- set_bit(tmp, &msg->dfree);
- msg->data[tmp].iov_base = buf;
- msg->data[tmp].iov_len = chunk;
- msg->dsize += chunk;
- *size_sent += chunk;
- size -= chunk;
-
- /* load the buffer with data */
- while (chunk > 0) {
- tmp = min(chunk, siov->iov_len);
- memcpy(buf, siov->iov_base, tmp);
- buf += tmp;
- siov->iov_base += tmp;
- siov->iov_len -= tmp;
- if (!siov->iov_len)
- siov++;
- chunk -= tmp;
- }
- }
- else {
- /* we want to attach the supplied buffers directly */
- while (chunk > 0 &&
- msg->dcount < RXRPC_MSG_MAX_IOCS) {
- tmp = msg->dcount++;
- msg->data[tmp].iov_base = siov->iov_base;
- msg->data[tmp].iov_len = siov->iov_len;
- msg->dsize += siov->iov_len;
- *size_sent += siov->iov_len;
- size -= siov->iov_len;
- chunk -= siov->iov_len;
- siov++;
- }
- }
-
- _debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
-
- /* dispatch the message when full, final or requesting ACK */
- if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
- ret = rxrpc_call_flush(call);
- if (ret < 0)
- goto out;
- }
-
- } while(size > 0);
-
- ret = 0;
- out:
- _leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
- return ret;
-
-} /* end rxrpc_call_write_data() */
-
-/*****************************************************************************/
-/*
- * flush outstanding packets to the network
- */
-static int rxrpc_call_flush(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- int ret = 0;
-
- _enter("%p", call);
-
- rxrpc_get_call(call);
-
- /* if there's a packet under construction, then dispatch it now */
- if (call->snd_nextmsg) {
- msg = call->snd_nextmsg;
- call->snd_nextmsg = NULL;
-
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
- if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
- msg->hdr.flags |= RXRPC_REQUEST_ACK;
- }
- else {
- msg->hdr.flags |= RXRPC_MORE_PACKETS;
- }
-
- _proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
- msg->dsize, msg->dcount, msg->dfree);
-
- /* queue and adjust call state */
- spin_lock(&call->lock);
- list_add_tail(&msg->link, &call->acks_pendq);
-
- /* decide what to do depending on current state and if this is
- * the last packet */
- ret = -EINVAL;
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_call_state =
- RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
- _state(call);
- }
- break;
-
- case RXRPC_CSTATE_CLNT_SND_ARGS:
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_call_state =
- RXRPC_CSTATE_CLNT_RCV_REPLY;
- _state(call);
- }
- break;
-
- case RXRPC_CSTATE_ERROR:
- ret = call->app_errno;
- default:
- spin_unlock(&call->lock);
- goto out;
- }
-
- call->acks_pend_cnt++;
-
- mod_timer(&call->acks_timeout,
- __rxrpc_rtt_based_timeout(call,
- rxrpc_call_acks_timeout));
-
- spin_unlock(&call->lock);
-
- ret = rxrpc_conn_sendmsg(call->conn, msg);
- if (ret == 0)
- call->pkt_snd_count++;
- }
-
- out:
- rxrpc_put_call(call);
-
- _leave(" = %d", ret);
- return ret;
-
-} /* end rxrpc_call_flush() */
-
-/*****************************************************************************/
-/*
- * resend NAK'd or unacknowledged packets up to the highest one specified
- */
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
-{
- struct rxrpc_message *msg;
- struct list_head *_p;
- rxrpc_seq_t seq = 0;
-
- _enter("%p,%u", call, highest);
-
- _proto("Rx Resend required");
-
- /* handle too many resends */
- if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
- _debug("Aborting due to too many resends (rcv=%d)",
- call->pkt_rcv_count);
- rxrpc_call_abort(call,
- call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
- _leave("");
- return;
- }
-
- spin_lock(&call->lock);
- call->snd_resend_cnt++;
- for (;;) {
- /* determine which the next packet we might need to ACK is */
- if (seq <= call->acks_dftv_seq)
- seq = call->acks_dftv_seq;
- seq++;
-
- if (seq > highest)
- break;
-
- /* look for the packet in the pending-ACK queue */
- list_for_each(_p, &call->acks_pendq) {
- msg = list_entry(_p, struct rxrpc_message, link);
- if (msg->seq == seq)
- goto found_msg;
- }
-
- panic("%s(%p,%d):"
- " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
- __FUNCTION__, call, highest,
- call->acks_dftv_seq, call->snd_seq_count, seq);
-
- found_msg:
- if (msg->state != RXRPC_MSG_SENT)
- continue; /* only un-ACK'd packets */
-
- rxrpc_get_message(msg);
- spin_unlock(&call->lock);
-
- /* send each message again (and ignore any errors we might
- * incur) */
- _proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
- msg->dsize, msg->dcount, msg->dfree);
-
- if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
- call->pkt_snd_count++;
-
- rxrpc_put_message(msg);
-
- spin_lock(&call->lock);
- }
-
- /* reset the timeout */
- mod_timer(&call->acks_timeout,
- __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
-
- spin_unlock(&call->lock);
-
- _leave("");
-} /* end rxrpc_call_resend() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a call
- */
-void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
-{
- _enter("%p{%u},%d", call, ntohl(call->call_id), errno);
-
- /* if this call is already aborted, then just wake up any waiters */
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- call->app_error_func(call);
- }
- else {
- /* tell the app layer what happened */
- spin_lock(&call->lock);
- call->app_call_state = RXRPC_CSTATE_ERROR;
- _state(call);
- if (local)
- call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
- else
- call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
- call->app_errno = errno;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- /* map the error */
- call->app_aemap_func(call);
-
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- spin_unlock(&call->lock);
-
- call->app_error_func(call);
- }
-
- _leave("");
-} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
deleted file mode 100644
index a7c929a9fdca..000000000000
--- a/net/rxrpc/connection.c
+++ /dev/null
@@ -1,777 +0,0 @@
-/* connection.c: Rx connection routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_connection_count);
-
-LIST_HEAD(rxrpc_conns);
-DECLARE_RWSEM(rxrpc_conns_sem);
-unsigned long rxrpc_conn_timeout = 60 * 60;
-
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
-
-static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
-{
- struct rxrpc_connection *conn =
- list_entry(timer, struct rxrpc_connection, timeout);
-
- _debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
-
- rxrpc_conn_do_timeout(conn);
-}
-
-static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
- .timed_out = __rxrpc_conn_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a new connection record
- */
-static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *conn;
-
- _enter("%p",peer);
-
- /* allocate and initialise a connection record */
- conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
- if (!conn) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&conn->usage, 1);
-
- INIT_LIST_HEAD(&conn->link);
- INIT_LIST_HEAD(&conn->id_link);
- init_waitqueue_head(&conn->chanwait);
- spin_lock_init(&conn->lock);
- rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
-
- do_gettimeofday(&conn->atime);
- conn->mtu_size = 1024;
- conn->peer = peer;
- conn->trans = peer->trans;
-
- __RXACCT(atomic_inc(&rxrpc_connection_count));
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
-
- return 0;
-} /* end __rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * create a new connection record for outgoing connections
- */
-int rxrpc_create_connection(struct rxrpc_transport *trans,
- __be16 port,
- __be32 addr,
- uint16_t service_id,
- void *security,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *candidate, *conn;
- struct rxrpc_peer *peer;
- struct list_head *_p;
- __be32 connid;
- int ret;
-
- _enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
-
- /* get a peer record */
- ret = rxrpc_peer_lookup(trans, addr, &peer);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* allocate and initialise a connection record */
- ret = __rxrpc_create_connection(peer, &candidate);
- if (ret < 0) {
- rxrpc_put_peer(peer);
- _leave(" = %d", ret);
- return ret;
- }
-
- /* fill in the specific bits */
- candidate->addr.sin_family = AF_INET;
- candidate->addr.sin_port = port;
- candidate->addr.sin_addr.s_addr = addr;
-
- candidate->in_epoch = rxrpc_epoch;
- candidate->out_epoch = rxrpc_epoch;
- candidate->in_clientflag = 0;
- candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->service_id = htons(service_id);
-
- /* invent a unique connection ID */
- write_lock(&peer->conn_idlock);
-
- try_next_id:
- connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
- peer->conn_idcounter += RXRPC_MAXCALLS;
-
- list_for_each(_p, &peer->conn_idlist) {
- conn = list_entry(_p, struct rxrpc_connection, id_link);
- if (connid == conn->conn_id)
- goto try_next_id;
- if (connid > conn->conn_id)
- break;
- }
-
- _debug("selected candidate conn ID %x.%u",
- ntohl(peer->addr.s_addr), ntohl(connid));
-
- candidate->conn_id = connid;
- list_add_tail(&candidate->id_link, _p);
-
- write_unlock(&peer->conn_idlock);
-
- /* attach to peer */
- candidate->peer = peer;
-
- write_lock(&peer->conn_lock);
-
- /* search the peer's transport graveyard list */
- spin_lock(&peer->conn_gylock);
- list_for_each(_p, &peer->conn_graveyard) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == candidate->addr.sin_port &&
- conn->security_ix == candidate->security_ix &&
- conn->service_id == candidate->service_id &&
- conn->in_clientflag == 0)
- goto found_in_graveyard;
- }
- spin_unlock(&peer->conn_gylock);
-
- /* pick the new candidate */
- _debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
- atomic_inc(&peer->conn_count);
- conn = candidate;
- candidate = NULL;
-
- make_active:
- list_add_tail(&conn->link, &peer->conn_active);
- write_unlock(&peer->conn_lock);
-
- if (candidate) {
- write_lock(&peer->conn_idlock);
- list_del(&candidate->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(candidate);
- }
- else {
- down_write(&rxrpc_conns_sem);
- list_add_tail(&conn->proc_link, &rxrpc_conns);
- up_write(&rxrpc_conns_sem);
- }
-
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
-
- return 0;
-
- /* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
- _debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
- rxrpc_get_connection(conn);
- rxrpc_krxtimod_del_timer(&conn->timeout);
- list_del_init(&conn->link);
- spin_unlock(&peer->conn_gylock);
- goto make_active;
-} /* end rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * lookup the connection for an incoming packet
- * - create a new connection record for unrecorded incoming connections
- */
-int rxrpc_connection_lookup(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct list_head *_p;
- struct sk_buff *pkt = msg->pkt;
- int ret, fresh = 0;
- __be32 x_epoch, x_connid;
- __be16 x_port, x_servid;
- __u32 x_secix;
- u8 x_clflag;
-
- _enter("%p{{%hu}},%u,%hu",
- peer,
- peer->trans->port,
- ntohs(pkt->h.uh->source),
- ntohs(msg->hdr.serviceId));
-
- x_port = pkt->h.uh->source;
- x_epoch = msg->hdr.epoch;
- x_clflag = msg->hdr.flags & RXRPC_CLIENT_INITIATED;
- x_connid = htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
- x_servid = msg->hdr.serviceId;
- x_secix = msg->hdr.securityIndex;
-
- /* [common case] search the transport's active list first */
- read_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_active;
- }
- read_unlock(&peer->conn_lock);
-
- /* [uncommon case] not active
- * - create a candidate for a new record if an inbound connection
- * - only examine the graveyard for an outbound connection
- */
- if (x_clflag) {
- ret = __rxrpc_create_connection(peer, &candidate);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* fill in the specifics */
- candidate->addr.sin_family = AF_INET;
- candidate->addr.sin_port = x_port;
- candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
- candidate->in_epoch = x_epoch;
- candidate->out_epoch = x_epoch;
- candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->out_clientflag = 0;
- candidate->conn_id = x_connid;
- candidate->service_id = x_servid;
- candidate->security_ix = x_secix;
- }
-
- /* search the active list again, just in case it appeared whilst we
- * were busy */
- write_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_active_second_chance;
- }
-
- /* search the transport's graveyard list */
- spin_lock(&peer->conn_gylock);
- list_for_each(_p, &peer->conn_graveyard) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_in_graveyard;
- }
- spin_unlock(&peer->conn_gylock);
-
- /* outbound connections aren't created here */
- if (!x_clflag) {
- write_unlock(&peer->conn_lock);
- _leave(" = -ENOENT");
- return -ENOENT;
- }
-
- /* we can now add the new candidate to the list */
- _debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
- rxrpc_get_peer(peer);
- conn = candidate;
- candidate = NULL;
- atomic_inc(&peer->conn_count);
- fresh = 1;
-
- make_active:
- list_add_tail(&conn->link, &peer->conn_active);
-
- success_uwfree:
- write_unlock(&peer->conn_lock);
-
- if (candidate) {
- write_lock(&peer->conn_idlock);
- list_del(&candidate->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(candidate);
- }
-
- if (fresh) {
- down_write(&rxrpc_conns_sem);
- list_add_tail(&conn->proc_link, &rxrpc_conns);
- up_write(&rxrpc_conns_sem);
- }
-
- success:
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
- return 0;
-
- /* handle the connection being found in the active list straight off */
- found_active:
- rxrpc_get_connection(conn);
- read_unlock(&peer->conn_lock);
- goto success;
-
- /* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
- _debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
- rxrpc_get_peer(peer);
- rxrpc_get_connection(conn);
- rxrpc_krxtimod_del_timer(&conn->timeout);
- list_del_init(&conn->link);
- spin_unlock(&peer->conn_gylock);
- goto make_active;
-
- /* handle finding the connection on the second time through the active
- * list */
- found_active_second_chance:
- rxrpc_get_connection(conn);
- goto success_uwfree;
-
-} /* end rxrpc_connection_lookup() */
-
-/*****************************************************************************/
-/*
- * finish using a connection record
- * - it will be transferred to the peer's connection graveyard when refcount
- * reaches 0
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
- struct rxrpc_peer *peer;
-
- if (!conn)
- return;
-
- _enter("%p{u=%d p=%hu}",
- conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
- peer = conn->peer;
- spin_lock(&peer->conn_gylock);
-
- /* sanity check */
- if (atomic_read(&conn->usage) <= 0)
- BUG();
-
- if (likely(!atomic_dec_and_test(&conn->usage))) {
- spin_unlock(&peer->conn_gylock);
- _leave("");
- return;
- }
-
- /* move to graveyard queue */
- _debug("burying connection: {%08x}", ntohl(conn->conn_id));
- list_move_tail(&conn->link, &peer->conn_graveyard);
-
- rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
-
- spin_unlock(&peer->conn_gylock);
-
- rxrpc_put_peer(conn->peer);
-
- _leave(" [killed]");
-} /* end rxrpc_put_connection() */
-
-/*****************************************************************************/
-/*
- * free a connection record
- */
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
-{
- struct rxrpc_peer *peer;
-
- _enter("%p{u=%d p=%hu}",
- conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
- peer = conn->peer;
-
- if (atomic_read(&conn->usage) < 0)
- BUG();
-
- /* remove from graveyard if still dead */
- spin_lock(&peer->conn_gylock);
- if (atomic_read(&conn->usage) == 0) {
- list_del_init(&conn->link);
- }
- else {
- conn = NULL;
- }
- spin_unlock(&peer->conn_gylock);
-
- if (!conn) {
- _leave("");
- return; /* resurrected */
- }
-
- _debug("--- Destroying Connection %p{%08x} ---",
- conn, ntohl(conn->conn_id));
-
- down_write(&rxrpc_conns_sem);
- list_del(&conn->proc_link);
- up_write(&rxrpc_conns_sem);
-
- write_lock(&peer->conn_idlock);
- list_del(&conn->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(conn);
-
- /* if the graveyard is now empty, wake up anyone waiting for that */
- if (atomic_dec_and_test(&peer->conn_count))
- wake_up(&peer->conn_gy_waitq);
-
- _leave(" [destroyed]");
-} /* end rxrpc_conn_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all connection records from a peer endpoint
- */
-void rxrpc_conn_clearall(struct rxrpc_peer *peer)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- struct rxrpc_connection *conn;
- int err;
-
- _enter("%p", peer);
-
- /* there shouldn't be any active conns remaining */
- if (!list_empty(&peer->conn_active))
- BUG();
-
- /* manually timeout all conns in the graveyard */
- spin_lock(&peer->conn_gylock);
- while (!list_empty(&peer->conn_graveyard)) {
- conn = list_entry(peer->conn_graveyard.next,
- struct rxrpc_connection, link);
- err = rxrpc_krxtimod_del_timer(&conn->timeout);
- spin_unlock(&peer->conn_gylock);
-
- if (err == 0)
- rxrpc_conn_do_timeout(conn);
-
- spin_lock(&peer->conn_gylock);
- }
- spin_unlock(&peer->conn_gylock);
-
- /* wait for the the conn graveyard to be completely cleared */
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&peer->conn_gy_waitq, &myself);
-
- while (atomic_read(&peer->conn_count) != 0) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&peer->conn_gy_waitq, &myself);
- set_current_state(TASK_RUNNING);
-
- _leave("");
-} /* end rxrpc_conn_clearall() */
-
-/*****************************************************************************/
-/*
- * allocate and prepare a message for sending out through the transport
- * endpoint
- */
-int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- uint8_t type,
- int dcount,
- struct kvec diov[],
- gfp_t alloc_flags,
- struct rxrpc_message **_msg)
-{
- struct rxrpc_message *msg;
- int loop;
-
- _enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
-
- if (dcount > 3) {
- _leave(" = -EINVAL");
- return -EINVAL;
- }
-
- msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
- if (!msg) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&msg->usage, 1);
-
- INIT_LIST_HEAD(&msg->link);
-
- msg->state = RXRPC_MSG_PREPARED;
-
- msg->hdr.epoch = conn->out_epoch;
- msg->hdr.cid = conn->conn_id | (call ? call->chan_ix : 0);
- msg->hdr.callNumber = call ? call->call_id : 0;
- msg->hdr.type = type;
- msg->hdr.flags = conn->out_clientflag;
- msg->hdr.securityIndex = conn->security_ix;
- msg->hdr.serviceId = conn->service_id;
-
- /* generate sequence numbers for data packets */
- if (call) {
- switch (type) {
- case RXRPC_PACKET_TYPE_DATA:
- msg->seq = ++call->snd_seq_count;
- msg->hdr.seq = htonl(msg->seq);
- break;
- case RXRPC_PACKET_TYPE_ACK:
- /* ACK sequence numbers are complicated. The following
- * may be wrong:
- * - jumbo packet ACKs should have a seq number
- * - normal ACKs should not
- */
- default:
- break;
- }
- }
-
- msg->dcount = dcount + 1;
- msg->dsize = sizeof(msg->hdr);
- msg->data[0].iov_len = sizeof(msg->hdr);
- msg->data[0].iov_base = &msg->hdr;
-
- for (loop=0; loop < dcount; loop++) {
- msg->dsize += diov[loop].iov_len;
- msg->data[loop+1].iov_len = diov[loop].iov_len;
- msg->data[loop+1].iov_base = diov[loop].iov_base;
- }
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
- *_msg = msg;
- _leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
- return 0;
-} /* end rxrpc_conn_newmsg() */
-
-/*****************************************************************************/
-/*
- * free a message
- */
-void __rxrpc_put_message(struct rxrpc_message *msg)
-{
- int loop;
-
- _enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
-
- if (msg->pkt)
- kfree_skb(msg->pkt);
- rxrpc_put_connection(msg->conn);
-
- for (loop = 0; loop < 8; loop++)
- if (test_bit(loop, &msg->dfree))
- kfree(msg->data[loop].iov_base);
-
- __RXACCT(atomic_dec(&rxrpc_message_count));
- kfree(msg);
-
- _leave("");
-} /* end __rxrpc_put_message() */
-
-/*****************************************************************************/
-/*
- * send a message out through the transport endpoint
- */
-int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
- struct rxrpc_message *msg)
-{
- struct msghdr msghdr;
- int ret;
-
- _enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
-
- /* fill in some fields in the header */
- spin_lock(&conn->lock);
- msg->hdr.serial = htonl(++conn->serial_counter);
- msg->rttdone = 0;
- spin_unlock(&conn->lock);
-
- /* set up the message to be transmitted */
- msghdr.msg_name = &conn->addr;
- msghdr.msg_namelen = sizeof(conn->addr);
- msghdr.msg_control = NULL;
- msghdr.msg_controllen = 0;
- msghdr.msg_flags = MSG_CONFIRM | MSG_DONTWAIT;
-
- _net("Sending message type %d of %Zd bytes to %08x:%d",
- msg->hdr.type,
- msg->dsize,
- ntohl(conn->addr.sin_addr.s_addr),
- ntohs(conn->addr.sin_port));
-
- /* send the message */
- ret = kernel_sendmsg(conn->trans->socket, &msghdr,
- msg->data, msg->dcount, msg->dsize);
- if (ret < 0) {
- msg->state = RXRPC_MSG_ERROR;
- } else {
- msg->state = RXRPC_MSG_SENT;
- ret = 0;
-
- spin_lock(&conn->lock);
- do_gettimeofday(&conn->atime);
- msg->stamp = conn->atime;
- spin_unlock(&conn->lock);
- }
-
- _leave(" = %d", ret);
-
- return ret;
-} /* end rxrpc_conn_sendmsg() */
-
-/*****************************************************************************/
-/*
- * deal with a subsequent call packet
- */
-int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- struct rxrpc_message *pmsg;
- struct dst_entry *dst;
- struct list_head *_p;
- unsigned cix, seq;
- int ret = 0;
-
- _enter("%p,%p,%p", conn, call, msg);
-
- if (!call) {
- cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
- spin_lock(&conn->lock);
- call = conn->channels[cix];
-
- if (!call || call->call_id != msg->hdr.callNumber) {
- spin_unlock(&conn->lock);
- rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
- goto out;
- }
- else {
- rxrpc_get_call(call);
- spin_unlock(&conn->lock);
- }
- }
- else {
- rxrpc_get_call(call);
- }
-
- _proto("Received packet %%%u [%u] on call %hu:%u:%u",
- ntohl(msg->hdr.serial),
- ntohl(msg->hdr.seq),
- ntohs(msg->hdr.serviceId),
- ntohl(conn->conn_id),
- ntohl(call->call_id));
-
- call->pkt_rcv_count++;
-
- dst = msg->pkt->dst;
- if (dst && dst->dev)
- conn->peer->if_mtu =
- dst->dev->mtu - dst->dev->hard_header_len;
-
- /* queue on the call in seq order */
- rxrpc_get_message(msg);
- seq = msg->seq;
-
- spin_lock(&call->lock);
- list_for_each(_p, &call->rcv_receiveq) {
- pmsg = list_entry(_p, struct rxrpc_message, link);
- if (pmsg->seq > seq)
- break;
- }
- list_add_tail(&msg->link, _p);
-
- /* reset the activity timeout */
- call->flags |= RXRPC_CALL_RCV_PKT;
- mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
-
- spin_unlock(&call->lock);
-
- rxrpc_krxiod_queue_call(call);
-
- rxrpc_put_call(call);
- out:
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_conn_receive_call_packet() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a connection
- */
-void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
- int local, int errno)
-{
- struct rxrpc_call *calls[4];
- int loop;
-
- _enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
-
- /* get a ref to all my calls in one go */
- memset(calls, 0, sizeof(calls));
- spin_lock(&conn->lock);
-
- for (loop = 3; loop >= 0; loop--) {
- if (conn->channels[loop]) {
- calls[loop] = conn->channels[loop];
- rxrpc_get_call(calls[loop]);
- }
- }
-
- spin_unlock(&conn->lock);
-
- /* now kick them all */
- for (loop = 3; loop >= 0; loop--) {
- if (calls[loop]) {
- rxrpc_call_handle_error(calls[loop], local, errno);
- rxrpc_put_call(calls[loop]);
- }
- }
-
- _leave("");
-} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
deleted file mode 100644
index cc0c5795a103..000000000000
--- a/net/rxrpc/internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* internal.h: internal Rx RPC stuff
- *
- * Copyright (c) 2002 David Howells (dhowells@redhat.com).
- */
-
-#ifndef RXRPC_INTERNAL_H
-#define RXRPC_INTERNAL_H
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-
-/*
- * debug accounting
- */
-#if 1
-#define __RXACCT_DECL(X) X
-#define __RXACCT(X) do { X; } while(0)
-#else
-#define __RXACCT_DECL(X)
-#define __RXACCT(X) do { } while(0)
-#endif
-
-__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
-__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
-__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
-__RXACCT_DECL(extern atomic_t rxrpc_call_count);
-__RXACCT_DECL(extern atomic_t rxrpc_message_count);
-
-/*
- * debug tracing
- */
-#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
-#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
-#define kdebug(FMT, a...) printk(" "FMT"\n" , ##a)
-#define kproto(FMT, a...) printk("### "FMT"\n" , ##a)
-#define knet(FMT, a...) printk(" "FMT"\n" , ##a)
-
-#if 0
-#define _enter(FMT, a...) kenter(FMT , ##a)
-#define _leave(FMT, a...) kleave(FMT , ##a)
-#define _debug(FMT, a...) kdebug(FMT , ##a)
-#define _proto(FMT, a...) kproto(FMT , ##a)
-#define _net(FMT, a...) knet(FMT , ##a)
-#else
-#define _enter(FMT, a...) do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
-#define _leave(FMT, a...) do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
-#define _debug(FMT, a...) do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
-#define _proto(FMT, a...) do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
-#define _net(FMT, a...) do { if (rxrpc_knet) knet (FMT , ##a); } while(0)
-#endif
-
-static inline void rxrpc_discard_my_signals(void)
-{
- while (signal_pending(current)) {
- siginfo_t sinfo;
-
- spin_lock_irq(&current->sighand->siglock);
- dequeue_signal(current, &current->blocked, &sinfo);
- spin_unlock_irq(&current->sighand->siglock);
- }
-}
-
-/*
- * call.c
- */
-extern struct list_head rxrpc_calls;
-extern struct rw_semaphore rxrpc_calls_sem;
-
-/*
- * connection.c
- */
-extern struct list_head rxrpc_conns;
-extern struct rw_semaphore rxrpc_conns_sem;
-extern unsigned long rxrpc_conn_timeout;
-
-extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
-
-/*
- * peer.c
- */
-extern struct list_head rxrpc_peers;
-extern struct rw_semaphore rxrpc_peers_sem;
-extern unsigned long rxrpc_peer_timeout;
-
-extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_message *resp);
-
-extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
-
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int rxrpc_proc_init(void);
-extern void rxrpc_proc_cleanup(void);
-#endif
-
-/*
- * transport.c
- */
-extern struct list_head rxrpc_proc_transports;
-extern struct rw_semaphore rxrpc_proc_transports_sem;
-
-#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
deleted file mode 100644
index bbbcd6c24048..000000000000
--- a/net/rxrpc/krxiod.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* krxiod.c: Rx I/O daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/freezer.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxiod_dead);
-
-static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
-
-static LIST_HEAD(rxrpc_krxiod_transportq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
-
-static LIST_HEAD(rxrpc_krxiod_callq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
-
-static volatile int rxrpc_krxiod_die;
-
-/*****************************************************************************/
-/*
- * Rx I/O daemon
- */
-static int rxrpc_krxiod(void *arg)
-{
- DECLARE_WAITQUEUE(krxiod,current);
-
- printk("Started krxiod %d\n",current->pid);
-
- daemonize("krxiod");
-
- /* loop around waiting for work to do */
- do {
- /* wait for work or to be told to exit */
- _debug("### Begin Wait");
- if (!atomic_read(&rxrpc_krxiod_qcount)) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&rxrpc_krxiod_qcount) ||
- rxrpc_krxiod_die ||
- signal_pending(current))
- break;
-
- schedule();
- }
-
- remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
- set_current_state(TASK_RUNNING);
- }
- _debug("### End Wait");
-
- /* do work if been given some to do */
- _debug("### Begin Work");
-
- /* see if there's a transport in need of attention */
- if (!list_empty(&rxrpc_krxiod_transportq)) {
- struct rxrpc_transport *trans = NULL;
-
- spin_lock_irq(&rxrpc_krxiod_transportq_lock);
-
- if (!list_empty(&rxrpc_krxiod_transportq)) {
- trans = list_entry(
- rxrpc_krxiod_transportq.next,
- struct rxrpc_transport,
- krxiodq_link);
-
- list_del_init(&trans->krxiodq_link);
- atomic_dec(&rxrpc_krxiod_qcount);
-
- /* make sure it hasn't gone away and doesn't go
- * away */
- if (atomic_read(&trans->usage)>0)
- rxrpc_get_transport(trans);
- else
- trans = NULL;
- }
-
- spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
-
- if (trans) {
- rxrpc_trans_receive_packet(trans);
- rxrpc_put_transport(trans);
- }
- }
-
- /* see if there's a call in need of attention */
- if (!list_empty(&rxrpc_krxiod_callq)) {
- struct rxrpc_call *call = NULL;
-
- spin_lock_irq(&rxrpc_krxiod_callq_lock);
-
- if (!list_empty(&rxrpc_krxiod_callq)) {
- call = list_entry(rxrpc_krxiod_callq.next,
- struct rxrpc_call,
- rcv_krxiodq_lk);
- list_del_init(&call->rcv_krxiodq_lk);
- atomic_dec(&rxrpc_krxiod_qcount);
-
- /* make sure it hasn't gone away and doesn't go
- * away */
- if (atomic_read(&call->usage) > 0) {
- _debug("@@@ KRXIOD"
- " Begin Attend Call %p", call);
- rxrpc_get_call(call);
- }
- else {
- call = NULL;
- }
- }
-
- spin_unlock_irq(&rxrpc_krxiod_callq_lock);
-
- if (call) {
- rxrpc_call_do_stuff(call);
- rxrpc_put_call(call);
- _debug("@@@ KRXIOD End Attend Call %p", call);
- }
- }
-
- _debug("### End Work");
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- } while (!rxrpc_krxiod_die);
-
- /* and that's all */
- complete_and_exit(&rxrpc_krxiod_dead, 0);
-
-} /* end rxrpc_krxiod() */
-
-/*****************************************************************************/
-/*
- * start up a krxiod daemon
- */
-int __init rxrpc_krxiod_init(void)
-{
- return kernel_thread(rxrpc_krxiod, NULL, 0);
-
-} /* end rxrpc_krxiod_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxiod daemon and wait for it to complete
- */
-void rxrpc_krxiod_kill(void)
-{
- rxrpc_krxiod_die = 1;
- wake_up_all(&rxrpc_krxiod_sleepq);
- wait_for_completion(&rxrpc_krxiod_dead);
-
-} /* end rxrpc_krxiod_kill() */
-
-/*****************************************************************************/
-/*
- * queue a transport for attention by krxiod
- */
-void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
-{
- unsigned long flags;
-
- _enter("");
-
- if (list_empty(&trans->krxiodq_link)) {
- spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-
- if (list_empty(&trans->krxiodq_link)) {
- if (atomic_read(&trans->usage) > 0) {
- list_add_tail(&trans->krxiodq_link,
- &rxrpc_krxiod_transportq);
- atomic_inc(&rxrpc_krxiod_qcount);
- }
- }
-
- spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
- wake_up_all(&rxrpc_krxiod_sleepq);
- }
-
- _leave("");
-
-} /* end rxrpc_krxiod_queue_transport() */
-
-/*****************************************************************************/
-/*
- * dequeue a transport from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
-{
- unsigned long flags;
-
- _enter("");
-
- spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
- if (!list_empty(&trans->krxiodq_link)) {
- list_del_init(&trans->krxiodq_link);
- atomic_dec(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-
- _leave("");
-
-} /* end rxrpc_krxiod_dequeue_transport() */
-
-/*****************************************************************************/
-/*
- * queue a call for attention by krxiod
- */
-void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
-{
- unsigned long flags;
-
- if (list_empty(&call->rcv_krxiodq_lk)) {
- spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
- if (atomic_read(&call->usage) > 0) {
- list_add_tail(&call->rcv_krxiodq_lk,
- &rxrpc_krxiod_callq);
- atomic_inc(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
- }
- wake_up_all(&rxrpc_krxiod_sleepq);
-
-} /* end rxrpc_krxiod_queue_call() */
-
-/*****************************************************************************/
-/*
- * dequeue a call from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
- if (!list_empty(&call->rcv_krxiodq_lk)) {
- list_del_init(&call->rcv_krxiodq_lk);
- atomic_dec(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-
-} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
deleted file mode 100644
index 9a1e7f5e034c..000000000000
--- a/net/rxrpc/krxsecd.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/* krxsecd.c: Rx security daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This daemon deals with:
- * - consulting the application as to whether inbound peers and calls should be authorised
- * - generating security challenges for inbound connections
- * - responding to security challenges on outbound connections
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/message.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <linux/freezer.h>
-#include <net/sock.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
-static volatile int rxrpc_krxsecd_die;
-
-static atomic_t rxrpc_krxsecd_qcount;
-
-/* queue of unprocessed inbound messages with seqno #1 and
- * RXRPC_CLIENT_INITIATED flag set */
-static LIST_HEAD(rxrpc_krxsecd_initmsgq);
-static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
-
-static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
-
-/*****************************************************************************/
-/*
- * Rx security daemon
- */
-static int rxrpc_krxsecd(void *arg)
-{
- DECLARE_WAITQUEUE(krxsecd, current);
-
- int die;
-
- printk("Started krxsecd %d\n", current->pid);
-
- daemonize("krxsecd");
-
- /* loop around waiting for work to do */
- do {
- /* wait for work or to be told to exit */
- _debug("### Begin Wait");
- if (!atomic_read(&rxrpc_krxsecd_qcount)) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&rxrpc_krxsecd_qcount) ||
- rxrpc_krxsecd_die ||
- signal_pending(current))
- break;
-
- schedule();
- }
-
- remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
- set_current_state(TASK_RUNNING);
- }
- die = rxrpc_krxsecd_die;
- _debug("### End Wait");
-
- /* see if there're incoming calls in need of authenticating */
- _debug("### Begin Inbound Calls");
-
- if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
- struct rxrpc_message *msg = NULL;
-
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
- msg = list_entry(rxrpc_krxsecd_initmsgq.next,
- struct rxrpc_message, link);
- list_del_init(&msg->link);
- atomic_dec(&rxrpc_krxsecd_qcount);
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (msg) {
- rxrpc_krxsecd_process_incoming_call(msg);
- rxrpc_put_message(msg);
- }
- }
-
- _debug("### End Inbound Calls");
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- } while (!die);
-
- /* and that's all */
- complete_and_exit(&rxrpc_krxsecd_dead, 0);
-
-} /* end rxrpc_krxsecd() */
-
-/*****************************************************************************/
-/*
- * start up a krxsecd daemon
- */
-int __init rxrpc_krxsecd_init(void)
-{
- return kernel_thread(rxrpc_krxsecd, NULL, 0);
-
-} /* end rxrpc_krxsecd_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxsecd daemon and wait for it to complete
- */
-void rxrpc_krxsecd_kill(void)
-{
- rxrpc_krxsecd_die = 1;
- wake_up_all(&rxrpc_krxsecd_sleepq);
- wait_for_completion(&rxrpc_krxsecd_dead);
-
-} /* end rxrpc_krxsecd_kill() */
-
-/*****************************************************************************/
-/*
- * clear all pending incoming calls for the specified transport
- */
-void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
-{
- LIST_HEAD(tmp);
-
- struct rxrpc_message *msg;
- struct list_head *_p, *_n;
-
- _enter("%p",trans);
-
- /* move all the messages for this transport onto a temp list */
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
- msg = list_entry(_p, struct rxrpc_message, link);
- if (msg->trans == trans) {
- list_move_tail(&msg->link, &tmp);
- atomic_dec(&rxrpc_krxsecd_qcount);
- }
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- /* zap all messages on the temp list */
- while (!list_empty(&tmp)) {
- msg = list_entry(tmp.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
- rxrpc_put_message(msg);
- }
-
- _leave("");
-} /* end rxrpc_krxsecd_clear_transport() */
-
-/*****************************************************************************/
-/*
- * queue a message on the incoming calls list
- */
-void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
-{
- _enter("%p", msg);
-
- /* queue for processing by krxsecd */
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (!rxrpc_krxsecd_die) {
- rxrpc_get_message(msg);
- list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
- atomic_inc(&rxrpc_krxsecd_qcount);
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- wake_up(&rxrpc_krxsecd_sleepq);
-
- _leave("");
-} /* end rxrpc_krxsecd_queue_incoming_call() */
-
-/*****************************************************************************/
-/*
- * process the initial message of an incoming call
- */
-void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
-{
- struct rxrpc_transport *trans = msg->trans;
- struct rxrpc_service *srv;
- struct rxrpc_call *call;
- struct list_head *_p;
- unsigned short sid;
- int ret;
-
- _enter("%p{tr=%p}", msg, trans);
-
- ret = rxrpc_incoming_call(msg->conn, msg, &call);
- if (ret < 0)
- goto out;
-
- /* find the matching service on the transport */
- sid = ntohs(msg->hdr.serviceId);
- srv = NULL;
-
- spin_lock(&trans->lock);
- list_for_each(_p, &trans->services) {
- srv = list_entry(_p, struct rxrpc_service, link);
- if (srv->service_id == sid && try_module_get(srv->owner)) {
- /* found a match (made sure it won't vanish) */
- _debug("found service '%s'", srv->name);
- call->owner = srv->owner;
- break;
- }
- }
- spin_unlock(&trans->lock);
-
- /* report the new connection
- * - the func must inc the call's usage count to keep it
- */
- ret = -ENOENT;
- if (_p != &trans->services) {
- /* attempt to accept the call */
- call->conn->service = srv;
- call->app_attn_func = srv->attn_func;
- call->app_error_func = srv->error_func;
- call->app_aemap_func = srv->aemap_func;
-
- ret = srv->new_call(call);
-
- /* send an abort if an error occurred */
- if (ret < 0) {
- rxrpc_call_abort(call, ret);
- }
- else {
- /* formally receive and ACK the new packet */
- ret = rxrpc_conn_receive_call_packet(call->conn,
- call, msg);
- }
- }
-
- rxrpc_put_call(call);
- out:
- if (ret < 0)
- rxrpc_trans_immediate_abort(trans, msg, ret);
-
- _leave(" (%d)", ret);
-} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
deleted file mode 100644
index 9a9b6132dba4..000000000000
--- a/net/rxrpc/krxtimod.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/* krxtimod.c: RXRPC timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxtimod.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(krxtimod_alive);
-static DECLARE_COMPLETION(krxtimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
-static int krxtimod_die;
-
-static LIST_HEAD(krxtimod_list);
-static DEFINE_SPINLOCK(krxtimod_lock);
-
-static int krxtimod(void *arg);
-
-/*****************************************************************************/
-/*
- * start the timeout daemon
- */
-int rxrpc_krxtimod_start(void)
-{
- int ret;
-
- ret = kernel_thread(krxtimod, NULL, 0);
- if (ret < 0)
- return ret;
-
- wait_for_completion(&krxtimod_alive);
-
- return ret;
-} /* end rxrpc_krxtimod_start() */
-
-/*****************************************************************************/
-/*
- * stop the timeout daemon
- */
-void rxrpc_krxtimod_kill(void)
-{
- /* get rid of my daemon */
- krxtimod_die = 1;
- wake_up(&krxtimod_sleepq);
- wait_for_completion(&krxtimod_dead);
-
-} /* end rxrpc_krxtimod_kill() */
-
-/*****************************************************************************/
-/*
- * timeout processing daemon
- */
-static int krxtimod(void *arg)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- rxrpc_timer_t *timer;
-
- printk("Started krxtimod %d\n", current->pid);
-
- daemonize("krxtimod");
-
- complete(&krxtimod_alive);
-
- /* loop around looking for things to attend to */
- loop:
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&krxtimod_sleepq, &myself);
-
- for (;;) {
- unsigned long jif;
- long timeout;
-
- /* deal with the server being asked to die */
- if (krxtimod_die) {
- remove_wait_queue(&krxtimod_sleepq, &myself);
- _leave("");
- complete_and_exit(&krxtimod_dead, 0);
- }
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- /* work out the time to elapse before the next event */
- spin_lock(&krxtimod_lock);
- if (list_empty(&krxtimod_list)) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- }
- else {
- timer = list_entry(krxtimod_list.next,
- rxrpc_timer_t, link);
- timeout = timer->timo_jif;
- jif = jiffies;
-
- if (time_before_eq((unsigned long) timeout, jif))
- goto immediate;
-
- else {
- timeout = (long) timeout - (long) jiffies;
- }
- }
- spin_unlock(&krxtimod_lock);
-
- schedule_timeout(timeout);
-
- set_current_state(TASK_INTERRUPTIBLE);
- }
-
- /* the thing on the front of the queue needs processing
- * - we come here with the lock held and timer pointing to the expired
- * entry
- */
- immediate:
- remove_wait_queue(&krxtimod_sleepq, &myself);
- set_current_state(TASK_RUNNING);
-
- _debug("@@@ Begin Timeout of %p", timer);
-
- /* dequeue the timer */
- list_del_init(&timer->link);
- spin_unlock(&krxtimod_lock);
-
- /* call the timeout function */
- timer->ops->timed_out(timer);
-
- _debug("@@@ End Timeout");
- goto loop;
-
-} /* end krxtimod() */
-
-/*****************************************************************************/
-/*
- * (re-)queue a timer
- */
-void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
-{
- struct list_head *_p;
- rxrpc_timer_t *ptimer;
-
- _enter("%p,%lu", timer, timeout);
-
- spin_lock(&krxtimod_lock);
-
- list_del(&timer->link);
-
- /* the timer was deferred or reset - put it back in the queue at the
- * right place */
- timer->timo_jif = jiffies + timeout;
-
- list_for_each(_p, &krxtimod_list) {
- ptimer = list_entry(_p, rxrpc_timer_t, link);
- if (time_before(timer->timo_jif, ptimer->timo_jif))
- break;
- }
-
- list_add_tail(&timer->link, _p); /* insert before stopping point */
-
- spin_unlock(&krxtimod_lock);
-
- wake_up(&krxtimod_sleepq);
-
- _leave("");
-} /* end rxrpc_krxtimod_add_timer() */
-
-/*****************************************************************************/
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
-{
- int ret = 0;
-
- _enter("%p", timer);
-
- spin_lock(&krxtimod_lock);
-
- if (list_empty(&timer->link))
- ret = -ENOENT;
- else
- list_del_init(&timer->link);
-
- spin_unlock(&krxtimod_lock);
-
- wake_up(&krxtimod_sleepq);
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
deleted file mode 100644
index baec1f7fd8b9..000000000000
--- a/net/rxrpc/main.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* main.c: Rx RPC interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/krxtimod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-MODULE_DESCRIPTION("Rx RPC implementation");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-__be32 rxrpc_epoch;
-
-/*****************************************************************************/
-/*
- * initialise the Rx module
- */
-static int __init rxrpc_initialise(void)
-{
- int ret;
-
- /* my epoch value */
- rxrpc_epoch = htonl(xtime.tv_sec);
-
- /* register the /proc interface */
-#ifdef CONFIG_PROC_FS
- ret = rxrpc_proc_init();
- if (ret<0)
- return ret;
-#endif
-
- /* register the sysctl files */
-#ifdef CONFIG_SYSCTL
- ret = rxrpc_sysctl_init();
- if (ret<0)
- goto error_proc;
-#endif
-
- /* start the krxtimod daemon */
- ret = rxrpc_krxtimod_start();
- if (ret<0)
- goto error_sysctl;
-
- /* start the krxiod daemon */
- ret = rxrpc_krxiod_init();
- if (ret<0)
- goto error_krxtimod;
-
- /* start the krxsecd daemon */
- ret = rxrpc_krxsecd_init();
- if (ret<0)
- goto error_krxiod;
-
- kdebug("\n\n");
-
- return 0;
-
- error_krxiod:
- rxrpc_krxiod_kill();
- error_krxtimod:
- rxrpc_krxtimod_kill();
- error_sysctl:
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl_cleanup();
- error_proc:
-#endif
-#ifdef CONFIG_PROC_FS
- rxrpc_proc_cleanup();
-#endif
- return ret;
-} /* end rxrpc_initialise() */
-
-module_init(rxrpc_initialise);
-
-/*****************************************************************************/
-/*
- * clean up the Rx module
- */
-static void __exit rxrpc_cleanup(void)
-{
- kenter("");
-
- __RXACCT(printk("Outstanding Messages : %d\n",
- atomic_read(&rxrpc_message_count)));
- __RXACCT(printk("Outstanding Calls : %d\n",
- atomic_read(&rxrpc_call_count)));
- __RXACCT(printk("Outstanding Connections: %d\n",
- atomic_read(&rxrpc_connection_count)));
- __RXACCT(printk("Outstanding Peers : %d\n",
- atomic_read(&rxrpc_peer_count)));
- __RXACCT(printk("Outstanding Transports : %d\n",
- atomic_read(&rxrpc_transport_count)));
-
- rxrpc_krxsecd_kill();
- rxrpc_krxiod_kill();
- rxrpc_krxtimod_kill();
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl_cleanup();
-#endif
-#ifdef CONFIG_PROC_FS
- rxrpc_proc_cleanup();
-#endif
-
- __RXACCT(printk("Outstanding Messages : %d\n",
- atomic_read(&rxrpc_message_count)));
- __RXACCT(printk("Outstanding Calls : %d\n",
- atomic_read(&rxrpc_call_count)));
- __RXACCT(printk("Outstanding Connections: %d\n",
- atomic_read(&rxrpc_connection_count)));
- __RXACCT(printk("Outstanding Peers : %d\n",
- atomic_read(&rxrpc_peer_count)));
- __RXACCT(printk("Outstanding Transports : %d\n",
- atomic_read(&rxrpc_transport_count)));
-
- kleave("");
-} /* end rxrpc_cleanup() */
-
-module_exit(rxrpc_cleanup);
-
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
- asm volatile(" movl %%esp,%%edi \n"
- " andl %0,%%edi \n"
- " addl %1,%%edi \n"
- " movl %%esp,%%ecx \n"
- " subl %%edi,%%ecx \n"
- " shrl $2,%%ecx \n"
- " movl $0xedededed,%%eax \n"
- " rep stosl \n"
- :
- : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
- : "eax", "ecx", "edi", "memory", "cc"
- );
-}
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
- asm volatile(" movl %%esp,%%edi \n"
- " andl %0,%%edi \n"
- " addl %1,%%edi \n"
- " movl %%esp,%%ecx \n"
- " subl %%edi,%%ecx \n"
- " shrl $2,%%ecx \n"
- " movl $0xdadadada,%%eax \n"
- " rep stosl \n"
- :
- : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
- : "eax", "ecx", "edi", "memory", "cc"
- );
-}
-#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
deleted file mode 100644
index 8a275157a3bb..000000000000
--- a/net/rxrpc/peer.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/* peer.c: Rx RPC peer management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include <asm/div64.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_peer_count);
-LIST_HEAD(rxrpc_peers);
-DECLARE_RWSEM(rxrpc_peers_sem);
-unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
-
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
-
-static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
-{
- struct rxrpc_peer *peer =
- list_entry(timer, struct rxrpc_peer, timeout);
-
- _debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
-
- rxrpc_peer_do_timeout(peer);
-}
-
-static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
- .timed_out = __rxrpc_peer_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a peer record
- */
-static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
- struct rxrpc_peer **_peer)
-{
- struct rxrpc_peer *peer;
-
- _enter("%p,%08x", trans, ntohl(addr));
-
- /* allocate and initialise a peer record */
- peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
- if (!peer) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&peer->usage, 1);
-
- INIT_LIST_HEAD(&peer->link);
- INIT_LIST_HEAD(&peer->proc_link);
- INIT_LIST_HEAD(&peer->conn_idlist);
- INIT_LIST_HEAD(&peer->conn_active);
- INIT_LIST_HEAD(&peer->conn_graveyard);
- spin_lock_init(&peer->conn_gylock);
- init_waitqueue_head(&peer->conn_gy_waitq);
- rwlock_init(&peer->conn_idlock);
- rwlock_init(&peer->conn_lock);
- atomic_set(&peer->conn_count, 0);
- spin_lock_init(&peer->lock);
- rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
-
- peer->addr.s_addr = addr;
-
- peer->trans = trans;
- peer->ops = trans->peer_ops;
-
- __RXACCT(atomic_inc(&rxrpc_peer_count));
- *_peer = peer;
- _leave(" = 0 (%p)", peer);
-
- return 0;
-} /* end __rxrpc_create_peer() */
-
-/*****************************************************************************/
-/*
- * find a peer record on the specified transport
- * - returns (if successful) with peer record usage incremented
- * - resurrects it from the graveyard if found there
- */
-int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
- struct rxrpc_peer **_peer)
-{
- struct rxrpc_peer *peer, *candidate = NULL;
- struct list_head *_p;
- int ret;
-
- _enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
-
- /* [common case] search the transport's active list first */
- read_lock(&trans->peer_lock);
- list_for_each(_p, &trans->peer_active) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_active;
- }
- read_unlock(&trans->peer_lock);
-
- /* [uncommon case] not active - create a candidate for a new record */
- ret = __rxrpc_create_peer(trans, addr, &candidate);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* search the active list again, just in case it appeared whilst we
- * were busy */
- write_lock(&trans->peer_lock);
- list_for_each(_p, &trans->peer_active) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_active_second_chance;
- }
-
- /* search the transport's graveyard list */
- spin_lock(&trans->peer_gylock);
- list_for_each(_p, &trans->peer_graveyard) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_in_graveyard;
- }
- spin_unlock(&trans->peer_gylock);
-
- /* we can now add the new candidate to the list
- * - tell the application layer that this peer has been added
- */
- rxrpc_get_transport(trans);
- peer = candidate;
- candidate = NULL;
-
- if (peer->ops && peer->ops->adding) {
- ret = peer->ops->adding(peer);
- if (ret < 0) {
- write_unlock(&trans->peer_lock);
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(peer);
- rxrpc_put_transport(trans);
- _leave(" = %d", ret);
- return ret;
- }
- }
-
- atomic_inc(&trans->peer_count);
-
- make_active:
- list_add_tail(&peer->link, &trans->peer_active);
-
- success_uwfree:
- write_unlock(&trans->peer_lock);
-
- if (candidate) {
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(candidate);
- }
-
- if (list_empty(&peer->proc_link)) {
- down_write(&rxrpc_peers_sem);
- list_add_tail(&peer->proc_link, &rxrpc_peers);
- up_write(&rxrpc_peers_sem);
- }
-
- success:
- *_peer = peer;
-
- _leave(" = 0 (%p{u=%d cc=%d})",
- peer,
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count));
- return 0;
-
- /* handle the peer being found in the active list straight off */
- found_active:
- rxrpc_get_peer(peer);
- read_unlock(&trans->peer_lock);
- goto success;
-
- /* handle resurrecting a peer from the graveyard */
- found_in_graveyard:
- rxrpc_get_peer(peer);
- rxrpc_get_transport(peer->trans);
- rxrpc_krxtimod_del_timer(&peer->timeout);
- list_del_init(&peer->link);
- spin_unlock(&trans->peer_gylock);
- goto make_active;
-
- /* handle finding the peer on the second time through the active
- * list */
- found_active_second_chance:
- rxrpc_get_peer(peer);
- goto success_uwfree;
-
-} /* end rxrpc_peer_lookup() */
-
-/*****************************************************************************/
-/*
- * finish with a peer record
- * - it gets sent to the graveyard from where it can be resurrected or timed
- * out
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
- struct rxrpc_transport *trans = peer->trans;
-
- _enter("%p{cc=%d a=%08x}",
- peer,
- atomic_read(&peer->conn_count),
- ntohl(peer->addr.s_addr));
-
- /* sanity check */
- if (atomic_read(&peer->usage) <= 0)
- BUG();
-
- write_lock(&trans->peer_lock);
- spin_lock(&trans->peer_gylock);
- if (likely(!atomic_dec_and_test(&peer->usage))) {
- spin_unlock(&trans->peer_gylock);
- write_unlock(&trans->peer_lock);
- _leave("");
- return;
- }
-
- /* move to graveyard queue */
- list_del(&peer->link);
- write_unlock(&trans->peer_lock);
-
- list_add_tail(&peer->link, &trans->peer_graveyard);
-
- BUG_ON(!list_empty(&peer->conn_active));
-
- rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
-
- spin_unlock(&trans->peer_gylock);
-
- rxrpc_put_transport(trans);
-
- _leave(" [killed]");
-} /* end rxrpc_put_peer() */
-
-/*****************************************************************************/
-/*
- * handle a peer timing out in the graveyard
- * - called from krxtimod
- */
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
-{
- struct rxrpc_transport *trans = peer->trans;
-
- _enter("%p{u=%d cc=%d a=%08x}",
- peer,
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count),
- ntohl(peer->addr.s_addr));
-
- BUG_ON(atomic_read(&peer->usage) < 0);
-
- /* remove from graveyard if still dead */
- spin_lock(&trans->peer_gylock);
- if (atomic_read(&peer->usage) == 0)
- list_del_init(&peer->link);
- else
- peer = NULL;
- spin_unlock(&trans->peer_gylock);
-
- if (!peer) {
- _leave("");
- return; /* resurrected */
- }
-
- /* clear all connections on this peer */
- rxrpc_conn_clearall(peer);
-
- BUG_ON(!list_empty(&peer->conn_active));
- BUG_ON(!list_empty(&peer->conn_graveyard));
-
- /* inform the application layer */
- if (peer->ops && peer->ops->discarding)
- peer->ops->discarding(peer);
-
- if (!list_empty(&peer->proc_link)) {
- down_write(&rxrpc_peers_sem);
- list_del(&peer->proc_link);
- up_write(&rxrpc_peers_sem);
- }
-
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(peer);
-
- /* if the graveyard is now empty, wake up anyone waiting for that */
- if (atomic_dec_and_test(&trans->peer_count))
- wake_up(&trans->peer_gy_waitq);
-
- _leave(" [destroyed]");
-} /* end rxrpc_peer_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all peer records from a transport endpoint
- */
-void rxrpc_peer_clearall(struct rxrpc_transport *trans)
-{
- DECLARE_WAITQUEUE(myself,current);
-
- struct rxrpc_peer *peer;
- int err;
-
- _enter("%p",trans);
-
- /* there shouldn't be any active peers remaining */
- BUG_ON(!list_empty(&trans->peer_active));
-
- /* manually timeout all peers in the graveyard */
- spin_lock(&trans->peer_gylock);
- while (!list_empty(&trans->peer_graveyard)) {
- peer = list_entry(trans->peer_graveyard.next,
- struct rxrpc_peer, link);
- _debug("Clearing peer %p\n", peer);
- err = rxrpc_krxtimod_del_timer(&peer->timeout);
- spin_unlock(&trans->peer_gylock);
-
- if (err == 0)
- rxrpc_peer_do_timeout(peer);
-
- spin_lock(&trans->peer_gylock);
- }
- spin_unlock(&trans->peer_gylock);
-
- /* wait for the the peer graveyard to be completely cleared */
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&trans->peer_gy_waitq, &myself);
-
- while (atomic_read(&trans->peer_count) != 0) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&trans->peer_gy_waitq, &myself);
- set_current_state(TASK_RUNNING);
-
- _leave("");
-} /* end rxrpc_peer_clearall() */
-
-/*****************************************************************************/
-/*
- * calculate and cache the Round-Trip-Time for a message and its response
- */
-void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_message *resp)
-{
- unsigned long long rtt;
- int loop;
-
- _enter("%p,%p,%p", peer, msg, resp);
-
- /* calculate the latest RTT */
- rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
- rtt *= 1000000UL;
- rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
-
- /* add to cache */
- peer->rtt_cache[peer->rtt_point] = rtt;
- peer->rtt_point++;
- peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
-
- if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
- peer->rtt_usage++;
-
- /* recalculate RTT */
- rtt = 0;
- for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
- rtt += peer->rtt_cache[loop];
-
- do_div(rtt, peer->rtt_usage);
- peer->rtt = rtt;
-
- _leave(" RTT=%lu.%lums",
- (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
-
-} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
deleted file mode 100644
index 8551c879e456..000000000000
--- a/net/rxrpc/proc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/* proc.c: /proc interface for RxRPC
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-static struct proc_dir_entry *proc_rxrpc;
-
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_transports_ops = {
- .start = rxrpc_proc_transports_start,
- .next = rxrpc_proc_transports_next,
- .stop = rxrpc_proc_transports_stop,
- .show = rxrpc_proc_transports_show,
-};
-
-static const struct file_operations rxrpc_proc_transports_fops = {
- .open = rxrpc_proc_transports_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_peers_ops = {
- .start = rxrpc_proc_peers_start,
- .next = rxrpc_proc_peers_next,
- .stop = rxrpc_proc_peers_stop,
- .show = rxrpc_proc_peers_show,
-};
-
-static const struct file_operations rxrpc_proc_peers_fops = {
- .open = rxrpc_proc_peers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_conns_ops = {
- .start = rxrpc_proc_conns_start,
- .next = rxrpc_proc_conns_next,
- .stop = rxrpc_proc_conns_stop,
- .show = rxrpc_proc_conns_show,
-};
-
-static const struct file_operations rxrpc_proc_conns_fops = {
- .open = rxrpc_proc_conns_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_calls_ops = {
- .start = rxrpc_proc_calls_start,
- .next = rxrpc_proc_calls_next,
- .stop = rxrpc_proc_calls_stop,
- .show = rxrpc_proc_calls_show,
-};
-
-static const struct file_operations rxrpc_proc_calls_fops = {
- .open = rxrpc_proc_calls_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const char *rxrpc_call_states7[] = {
- "complet",
- "error ",
- "rcv_op ",
- "rcv_arg",
- "got_arg",
- "snd_rpl",
- "fin_ack",
- "snd_arg",
- "rcv_rpl",
- "got_rpl"
-};
-
-static const char *rxrpc_call_error_states7[] = {
- "no_err ",
- "loc_abt",
- "rmt_abt",
- "loc_err",
- "rmt_err"
-};
-
-/*****************************************************************************/
-/*
- * initialise the /proc/net/rxrpc/ directory
- */
-int rxrpc_proc_init(void)
-{
- struct proc_dir_entry *p;
-
- proc_rxrpc = proc_mkdir("rxrpc", proc_net);
- if (!proc_rxrpc)
- goto error;
- proc_rxrpc->owner = THIS_MODULE;
-
- p = create_proc_entry("calls", 0, proc_rxrpc);
- if (!p)
- goto error_proc;
- p->proc_fops = &rxrpc_proc_calls_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("connections", 0, proc_rxrpc);
- if (!p)
- goto error_calls;
- p->proc_fops = &rxrpc_proc_conns_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("peers", 0, proc_rxrpc);
- if (!p)
- goto error_calls;
- p->proc_fops = &rxrpc_proc_peers_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("transports", 0, proc_rxrpc);
- if (!p)
- goto error_conns;
- p->proc_fops = &rxrpc_proc_transports_fops;
- p->owner = THIS_MODULE;
-
- return 0;
-
- error_conns:
- remove_proc_entry("connections", proc_rxrpc);
- error_calls:
- remove_proc_entry("calls", proc_rxrpc);
- error_proc:
- remove_proc_entry("rxrpc", proc_net);
- error:
- return -ENOMEM;
-} /* end rxrpc_proc_init() */
-
-/*****************************************************************************/
-/*
- * clean up the /proc/net/rxrpc/ directory
- */
-void rxrpc_proc_cleanup(void)
-{
- remove_proc_entry("transports", proc_rxrpc);
- remove_proc_entry("peers", proc_rxrpc);
- remove_proc_entry("connections", proc_rxrpc);
- remove_proc_entry("calls", proc_rxrpc);
-
- remove_proc_entry("rxrpc", proc_net);
-
-} /* end rxrpc_proc_cleanup() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
- */
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_transports_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_transports_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the transports list and return the first item
- */
-static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_proc_transports_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_proc_transports)
- if (!pos--)
- break;
-
- return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in transports list
- */
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
-
- return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the transports list
- */
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_proc_transports_sem);
-
-} /* end rxrpc_proc_transports_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
-{
- struct rxrpc_transport *trans =
- list_entry(v, struct rxrpc_transport, proc_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m, "LOCAL USE\n");
- return 0;
- }
-
- /* display one transport per line on subsequent lines */
- seq_printf(m, "%5hu %3d\n",
- trans->port,
- atomic_read(&trans->usage)
- );
-
- return 0;
-} /* end rxrpc_proc_transports_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
- */
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_peers_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_peers_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the peers list and return the
- * first item
- */
-static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_peers_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_peers)
- if (!pos--)
- break;
-
- return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in peers list
- */
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
-
- return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the peers list
- */
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_peers_sem);
-
-} /* end rxrpc_proc_peers_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
-{
- struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
- long timeout;
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m, "LOCAL REMOTE USAGE CONNS TIMEOUT"
- " MTU RTT(uS)\n");
- return 0;
- }
-
- /* display one peer per line on subsequent lines */
- timeout = 0;
- if (!list_empty(&peer->timeout.link))
- timeout = (long) peer->timeout.timo_jif -
- (long) jiffies;
-
- seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
- peer->trans->port,
- ntohl(peer->addr.s_addr),
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count),
- timeout,
- peer->if_mtu,
- (long) peer->rtt
- );
-
- return 0;
-} /* end rxrpc_proc_peers_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/connections" which provides a summary of extant
- * connections
- */
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_conns_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_conns_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the conns list and return the
- * first item
- */
-static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_conns_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_conns)
- if (!pos--)
- break;
-
- return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in conns list
- */
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
-
- return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the conns list
- */
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_conns_sem);
-
-} /* end rxrpc_proc_conns_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
-{
- struct rxrpc_connection *conn;
- long timeout;
-
- conn = list_entry(v, struct rxrpc_connection, proc_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m,
- "LOCAL REMOTE RPORT SRVC CONN END SERIALNO "
- "CALLNO MTU TIMEOUT"
- "\n");
- return 0;
- }
-
- /* display one conn per line on subsequent lines */
- timeout = 0;
- if (!list_empty(&conn->timeout.link))
- timeout = (long) conn->timeout.timo_jif -
- (long) jiffies;
-
- seq_printf(m,
- "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
- conn->trans->port,
- ntohl(conn->addr.sin_addr.s_addr),
- ntohs(conn->addr.sin_port),
- ntohs(conn->service_id),
- ntohl(conn->conn_id),
- conn->out_clientflag ? "CLT" : "SRV",
- conn->serial_counter,
- conn->call_counter,
- conn->mtu_size,
- timeout
- );
-
- return 0;
-} /* end rxrpc_proc_conns_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
- */
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_calls_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_calls_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the calls list and return the
- * first item
- */
-static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_calls_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_calls)
- if (!pos--)
- break;
-
- return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in calls list
- */
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
-
- return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the calls list
- */
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_calls_sem);
-
-} /* end rxrpc_proc_calls_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
-{
- struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m,
- "LOCAL REMOT SRVC CONN CALL DIR USE "
- " L STATE OPCODE ABORT ERRNO\n"
- );
- return 0;
- }
-
- /* display one call per line on subsequent lines */
- seq_printf(m,
- "%5hu %5hu %04hx %08x %08x %s %3u%c"
- " %c %-7.7s %6d %08x %5d\n",
- call->conn->trans->port,
- ntohs(call->conn->addr.sin_port),
- ntohs(call->conn->service_id),
- ntohl(call->conn->conn_id),
- ntohl(call->call_id),
- call->conn->service ? "SVC" : "CLT",
- atomic_read(&call->usage),
- waitqueue_active(&call->waitq) ? 'w' : ' ',
- call->app_last_rcv ? 'Y' : '-',
- (call->app_call_state!=RXRPC_CSTATE_ERROR ?
- rxrpc_call_states7[call->app_call_state] :
- rxrpc_call_error_states7[call->app_err_state]),
- call->app_opcode,
- call->app_abort_code,
- call->app_errno
- );
-
- return 0;
-} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 000000000000..5ec705144e10
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1154 @@
+/* Kerberos-based RxRPC security
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/ctype.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define rxrpc_debug rxkad_debug
+#include "ar-internal.h"
+
+#define RXKAD_VERSION 2
+#define MAXKRB5TICKETLEN 1024
+#define RXKAD_TKT_TYPE_KERBEROS_V5 256
+#define ANAME_SZ 40 /* size of authentication name */
+#define INST_SZ 40 /* size of principal's instance */
+#define REALM_SZ 40 /* size of principal's auth domain */
+#define SNAME_SZ 40 /* size of service name */
+
+unsigned rxrpc_debug;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "rxkad debugging mask");
+
+struct rxkad_level1_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+};
+
+struct rxkad_level2_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+ __be32 checksum; /* decrypted data checksum */
+};
+
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+/*
+ * this holds a pinned cipher so that keventd doesn't get called by the cipher
+ * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
+ * packets
+ */
+static struct crypto_blkcipher *rxkad_ci;
+static DEFINE_MUTEX(rxkad_ci_mutex);
+
+/*
+ * initialise connection security
+ */
+static int rxkad_init_connection_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_key_payload *payload;
+ struct crypto_blkcipher *ci;
+ int ret;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+
+ payload = conn->key->payload.data;
+ conn->security_ix = payload->k.security_index;
+
+ ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ci)) {
+ _debug("no cipher");
+ ret = PTR_ERR(ci);
+ goto error;
+ }
+
+ if (crypto_blkcipher_setkey(ci, payload->k.session_key,
+ sizeof(payload->k.session_key)) < 0)
+ BUG();
+
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ break;
+ case RXRPC_SECURITY_AUTH:
+ conn->size_align = 8;
+ conn->security_size = sizeof(struct rxkad_level1_hdr);
+ conn->header_size += sizeof(struct rxkad_level1_hdr);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ conn->size_align = 8;
+ conn->security_size = sizeof(struct rxkad_level2_hdr);
+ conn->header_size += sizeof(struct rxkad_level2_hdr);
+ break;
+ default:
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ conn->cipher = ci;
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * prime the encryption state with the invariant parts of a connection's
+ * description
+ */
+static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_key_payload *payload;
+ struct blkcipher_desc desc;
+ struct scatterlist sg[2];
+ struct rxrpc_crypt iv;
+ struct {
+ __be32 x[4];
+ } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+
+ _enter("");
+
+ if (!conn->key)
+ return;
+
+ payload = conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+
+ desc.tfm = conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ tmpbuf.x[0] = conn->epoch;
+ tmpbuf.x[1] = conn->cid;
+ tmpbuf.x[2] = 0;
+ tmpbuf.x[3] = htonl(conn->security_ix);
+
+ memset(sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
+ ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+
+ _leave("");
+}
+
+/*
+ * partially encrypt a packet (level 1 security)
+ */
+static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 data_size,
+ void *sechdr)
+{
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ struct rxkad_level1_hdr hdr;
+ __be32 first; /* first four bytes of data and padding */
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ u16 check;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ data_size |= (u32) check << 16;
+
+ tmpbuf.hdr.data_size = htonl(data_size);
+ memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+
+ /* start the encryption afresh */
+ memset(&iv, 0, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ memset(sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 data_size,
+ void *sechdr)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_level2_hdr rxkhdr
+ __attribute__((aligned(8))); /* must be all on one page */
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[16];
+ struct sk_buff *trailer;
+ unsigned len;
+ u16 check;
+ int nsg;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+
+ rxkhdr.data_size = htonl(data_size | (u32) check << 16);
+ rxkhdr.checksum = 0;
+
+ /* encrypt from the session key */
+ payload = call->conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ memset(sg, 0, sizeof(sg[0]) * 2);
+ sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr));
+ sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
+
+ /* we want to encrypt the skbuff in-place */
+ nsg = skb_cow_data(skb, 0, &trailer);
+ if (nsg < 0 || nsg > 16)
+ return -ENOMEM;
+
+ len = data_size + call->conn->size_align - 1;
+ len &= ~(call->conn->size_align - 1);
+
+ skb_to_sgvec(skb, sg, 0, len);
+ crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxkad_secure_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ size_t data_size,
+ void *sechdr)
+{
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ __be32 x[2];
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ __be32 x;
+ int ret;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("{%d{%x}},{#%u},%zu,",
+ call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
+ data_size);
+
+ if (!call->conn->cipher)
+ return 0;
+
+ ret = key_validate(call->conn->key);
+ if (ret < 0)
+ return ret;
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ /* calculate the security checksum */
+ x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+ x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+ tmpbuf.x[0] = sp->hdr.callNumber;
+ tmpbuf.x[1] = x;
+
+ memset(&sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ x = ntohl(tmpbuf.x[1]);
+ x = (x >> 16) & 0xffff;
+ if (x == 0)
+ x = 1; /* zero checksums are not permitted */
+ sp->hdr.cksum = htons(x);
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_secure_packet_encrypt(call, skb, data_size,
+ sechdr);
+ break;
+ default:
+ ret = -EPERM;
+ break;
+ }
+
+ _leave(" = %d [set %hx]", ret, x);
+ return ret;
+}
+
+/*
+ * decrypt partial encryption on a packet (level 1 security)
+ */
+static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxkad_level1_hdr sechdr;
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct sk_buff *trailer;
+ u32 data_size, buf;
+ u16 check;
+
+ _enter("");
+
+ sp = rxrpc_skb(skb);
+
+ /* we want to decrypt the skbuff in-place */
+ if (skb_cow_data(skb, 0, &trailer) < 0)
+ goto nomem;
+
+ skb_to_sgvec(skb, sg, 0, 8);
+
+ /* start the decryption afresh */
+ memset(&iv, 0, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
+
+ /* remove the decrypted packet length */
+ if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+ goto datalen_error;
+ if (!skb_pull(skb, sizeof(sechdr)))
+ BUG();
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ check &= 0xffff;
+ if (check != 0) {
+ *_abort_code = RXKADSEALEDINCON;
+ goto protocol_error;
+ }
+
+ /* shorten the packet to remove the padding */
+ if (data_size > skb->len)
+ goto datalen_error;
+ else if (data_size < skb->len)
+ skb->len = data_size;
+
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+
+datalen_error:
+ *_abort_code = RXKADDATALEN;
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * wholly decrypt a packet (level 2 security)
+ */
+static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_level2_hdr sechdr;
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist _sg[4], *sg;
+ struct sk_buff *trailer;
+ u32 data_size, buf;
+ u16 check;
+ int nsg;
+
+ _enter(",{%d}", skb->len);
+
+ sp = rxrpc_skb(skb);
+
+ /* we want to decrypt the skbuff in-place */
+ nsg = skb_cow_data(skb, 0, &trailer);
+ if (nsg < 0)
+ goto nomem;
+
+ sg = _sg;
+ if (unlikely(nsg > 4)) {
+ sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
+ if (!sg)
+ goto nomem;
+ }
+
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ /* decrypt from the session key */
+ payload = call->conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
+ if (sg != _sg)
+ kfree(sg);
+
+ /* remove the decrypted packet length */
+ if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+ goto datalen_error;
+ if (!skb_pull(skb, sizeof(sechdr)))
+ BUG();
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ check &= 0xffff;
+ if (check != 0) {
+ *_abort_code = RXKADSEALEDINCON;
+ goto protocol_error;
+ }
+
+ /* shorten the packet to remove the padding */
+ if (data_size > skb->len)
+ goto datalen_error;
+ else if (data_size < skb->len)
+ skb->len = data_size;
+
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+
+datalen_error:
+ *_abort_code = RXKADDATALEN;
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * verify the security on a received packet
+ */
+static int rxkad_verify_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ __be32 x[2];
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ __be32 x;
+ __be16 cksum;
+ int ret;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("{%d{%x}},{#%u}",
+ call->debug_id, key_serial(call->conn->key),
+ ntohl(sp->hdr.seq));
+
+ if (!call->conn->cipher)
+ return 0;
+
+ if (sp->hdr.securityIndex != 2) {
+ *_abort_code = RXKADINCONSISTENCY;
+ _leave(" = -EPROTO [not rxkad]");
+ return -EPROTO;
+ }
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ /* validate the security checksum */
+ x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+ x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+ tmpbuf.x[0] = call->call_id;
+ tmpbuf.x[1] = x;
+
+ memset(&sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ x = ntohl(tmpbuf.x[1]);
+ x = (x >> 16) & 0xffff;
+ if (x == 0)
+ x = 1; /* zero checksums are not permitted */
+
+ cksum = htons(x);
+ if (sp->hdr.cksum != cksum) {
+ *_abort_code = RXKADSEALEDINCON;
+ _leave(" = -EPROTO [csum failed]");
+ return -EPROTO;
+ }
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_verify_packet_auth(call, skb, _abort_code);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
+ break;
+ default:
+ ret = -ENOANO;
+ break;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * issue a challenge
+ */
+static int rxkad_issue_challenge(struct rxrpc_connection *conn)
+{
+ struct rxkad_challenge challenge;
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ ret = key_validate(conn->key);
+ if (ret < 0)
+ return ret;
+
+ get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce));
+
+ challenge.version = htonl(2);
+ challenge.nonce = htonl(conn->security_nonce);
+ challenge.min_level = htonl(0);
+ challenge.__padding = 0;
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = conn->epoch;
+ hdr.cid = conn->cid;
+ hdr.callNumber = 0;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
+ hdr.flags = conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = conn->service_id;
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &challenge;
+ iov[1].iov_len = sizeof(challenge);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ hdr.serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * send a Kerberos security response
+ */
+static int rxkad_send_response(struct rxrpc_connection *conn,
+ struct rxrpc_header *hdr,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct msghdr msg;
+ struct kvec iov[3];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->epoch = conn->epoch;
+ hdr->seq = 0;
+ hdr->type = RXRPC_PACKET_TYPE_RESPONSE;
+ hdr->flags = conn->out_clientflag;
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+ iov[1].iov_base = resp;
+ iov[1].iov_len = sizeof(*resp);
+ iov[2].iov_base = (void *) s2->ticket;
+ iov[2].iov_len = s2->ticket_len;
+
+ len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
+
+ hdr->serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * calculate the response checksum
+ */
+static void rxkad_calc_response_checksum(struct rxkad_response *response)
+{
+ u32 csum = 1000003;
+ int loop;
+ u8 *p = (u8 *) response;
+
+ for (loop = sizeof(*response); loop > 0; loop--)
+ csum = csum * 0x10204081 + *p++;
+
+ response->encrypted.checksum = htonl(csum);
+}
+
+/*
+ * load a scatterlist with a potentially split-page buffer
+ */
+static void rxkad_sg_set_buf2(struct scatterlist sg[2],
+ void *buf, size_t buflen)
+{
+
+ memset(sg, 0, sizeof(sg));
+
+ sg_set_buf(&sg[0], buf, buflen);
+ if (sg[0].offset + buflen > PAGE_SIZE) {
+ /* the buffer was split over two pages */
+ sg[0].length = PAGE_SIZE - sg[0].offset;
+ sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
+ }
+
+ ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
+}
+
+/*
+ * encrypt the response packet
+ */
+static void rxkad_encrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist ssg[2], dsg[2];
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, s2->session_key, sizeof(iv));
+ desc.tfm = conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_challenge challenge;
+ struct rxkad_response resp
+ __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxrpc_skb_priv *sp;
+ u32 version, nonce, min_level, abort_code;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ if (!conn->key) {
+ _leave(" = -EPROTO [no key]");
+ return -EPROTO;
+ }
+
+ ret = key_validate(conn->key);
+ if (ret < 0) {
+ *_abort_code = RXKADEXPIRED;
+ return ret;
+ }
+
+ abort_code = RXKADPACKETSHORT;
+ sp = rxrpc_skb(skb);
+ if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+ goto protocol_error;
+
+ version = ntohl(challenge.version);
+ nonce = ntohl(challenge.nonce);
+ min_level = ntohl(challenge.min_level);
+
+ _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
+ ntohl(sp->hdr.serial), version, nonce, min_level);
+
+ abort_code = RXKADINCONSISTENCY;
+ if (version != RXKAD_VERSION)
+ goto protocol_error;
+
+ abort_code = RXKADLEVELFAIL;
+ if (conn->security_level < min_level)
+ goto protocol_error;
+
+ payload = conn->key->payload.data;
+
+ /* build the response packet */
+ memset(&resp, 0, sizeof(resp));
+
+ resp.version = RXKAD_VERSION;
+ resp.encrypted.epoch = conn->epoch;
+ resp.encrypted.cid = conn->cid;
+ resp.encrypted.securityIndex = htonl(conn->security_ix);
+ resp.encrypted.call_id[0] =
+ (conn->channels[0] ? conn->channels[0]->call_id : 0);
+ resp.encrypted.call_id[1] =
+ (conn->channels[1] ? conn->channels[1]->call_id : 0);
+ resp.encrypted.call_id[2] =
+ (conn->channels[2] ? conn->channels[2]->call_id : 0);
+ resp.encrypted.call_id[3] =
+ (conn->channels[3] ? conn->channels[3]->call_id : 0);
+ resp.encrypted.inc_nonce = htonl(nonce + 1);
+ resp.encrypted.level = htonl(conn->security_level);
+ resp.kvno = htonl(payload->k.kvno);
+ resp.ticket_len = htonl(payload->k.ticket_len);
+
+ /* calculate the response checksum and then do the encryption */
+ rxkad_calc_response_checksum(&resp);
+ rxkad_encrypt_response(conn, &resp, &payload->k);
+ return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+
+protocol_error:
+ *_abort_code = abort_code;
+ _leave(" = -EPROTO [%d]", abort_code);
+ return -EPROTO;
+}
+
+/*
+ * decrypt the kerberos IV ticket in the response
+ */
+static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ void *ticket, size_t ticket_len,
+ struct rxrpc_crypt *_session_key,
+ time_t *_expiry,
+ u32 *_abort_code)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv, key;
+ struct scatterlist ssg[1], dsg[1];
+ struct in_addr addr;
+ unsigned life;
+ time_t issue, now;
+ bool little_endian;
+ int ret;
+ u8 *p, *q, *name, *end;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
+
+ *_expiry = 0;
+
+ ret = key_validate(conn->server_key);
+ if (ret < 0) {
+ switch (ret) {
+ case -EKEYEXPIRED:
+ *_abort_code = RXKADEXPIRED;
+ goto error;
+ default:
+ *_abort_code = RXKADNOAUTH;
+ goto error;
+ }
+ }
+
+ ASSERT(conn->server_key->payload.data != NULL);
+ ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
+
+ memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+
+ desc.tfm = conn->server_key->payload.data;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ sg_init_one(&ssg[0], ticket, ticket_len);
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len);
+
+ p = ticket;
+ end = p + ticket_len;
+
+#define Z(size) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > (size)) \
+ goto bad_ticket; \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ goto bad_ticket; \
+ p++; \
+ __str; \
+ })
+
+ /* extract the ticket flags */
+ _debug("KIV FLAGS: %x", *p);
+ little_endian = *p & 1;
+ p++;
+
+ /* extract the authentication name */
+ name = Z(ANAME_SZ);
+ _debug("KIV ANAME: %s", name);
+
+ /* extract the principal's instance */
+ name = Z(INST_SZ);
+ _debug("KIV INST : %s", name);
+
+ /* extract the principal's authentication domain */
+ name = Z(REALM_SZ);
+ _debug("KIV REALM: %s", name);
+
+ if (end - p < 4 + 8 + 4 + 2)
+ goto bad_ticket;
+
+ /* get the IPv4 address of the entity that requested the ticket */
+ memcpy(&addr, p, sizeof(addr));
+ p += 4;
+ _debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr));
+
+ /* get the session key from the ticket */
+ memcpy(&key, p, sizeof(key));
+ p += 8;
+ _debug("KIV KEY : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
+ memcpy(_session_key, &key, sizeof(key));
+
+ /* get the ticket's lifetime */
+ life = *p++ * 5 * 60;
+ _debug("KIV LIFE : %u", life);
+
+ /* get the issue time of the ticket */
+ if (little_endian) {
+ __le32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = le32_to_cpu(stamp);
+ } else {
+ __be32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = be32_to_cpu(stamp);
+ }
+ p += 4;
+ now = xtime.tv_sec;
+ _debug("KIV ISSUE: %lx [%lx]", issue, now);
+
+ /* check the ticket is in date */
+ if (issue > now) {
+ *_abort_code = RXKADNOAUTH;
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ if (issue < now - life) {
+ *_abort_code = RXKADEXPIRED;
+ ret = -EKEYEXPIRED;
+ goto error;
+ }
+
+ *_expiry = issue + life;
+
+ /* get the service name */
+ name = Z(SNAME_SZ);
+ _debug("KIV SNAME: %s", name);
+
+ /* get the service instance name */
+ name = Z(INST_SZ);
+ _debug("KIV SINST: %s", name);
+
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+
+bad_ticket:
+ *_abort_code = RXKADBADTICKET;
+ ret = -EBADMSG;
+ goto error;
+}
+
+/*
+ * decrypt the response packet
+ */
+static void rxkad_decrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxrpc_crypt *session_key)
+{
+ struct blkcipher_desc desc;
+ struct scatterlist ssg[2], dsg[2];
+ struct rxrpc_crypt iv;
+
+ _enter(",,%08x%08x",
+ ntohl(session_key->n[0]), ntohl(session_key->n[1]));
+
+ ASSERT(rxkad_ci != NULL);
+
+ mutex_lock(&rxkad_ci_mutex);
+ if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
+ sizeof(*session_key)) < 0)
+ BUG();
+
+ memcpy(&iv, session_key, sizeof(iv));
+ desc.tfm = rxkad_ci;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+ mutex_unlock(&rxkad_ci_mutex);
+
+ _leave("");
+}
+
+/*
+ * verify a response
+ */
+static int rxkad_verify_response(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxkad_response response
+ __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_crypt session_key;
+ time_t expiry;
+ void *ticket;
+ u32 abort_code, version, kvno, ticket_len, csum, level;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+
+ abort_code = RXKADPACKETSHORT;
+ if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+ goto protocol_error;
+ if (!pskb_pull(skb, sizeof(response)))
+ BUG();
+
+ version = ntohl(response.version);
+ ticket_len = ntohl(response.ticket_len);
+ kvno = ntohl(response.kvno);
+ sp = rxrpc_skb(skb);
+ _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
+ ntohl(sp->hdr.serial), version, kvno, ticket_len);
+
+ abort_code = RXKADINCONSISTENCY;
+ if (version != RXKAD_VERSION)
+
+ abort_code = RXKADTICKETLEN;
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+ goto protocol_error;
+
+ abort_code = RXKADUNKNOWNKEY;
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+ goto protocol_error;
+
+ /* extract the kerberos ticket and decrypt and decode it */
+ ticket = kmalloc(ticket_len, GFP_NOFS);
+ if (!ticket)
+ return -ENOMEM;
+
+ abort_code = RXKADPACKETSHORT;
+ if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+ goto protocol_error_free;
+
+ ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
+ &expiry, &abort_code);
+ if (ret < 0) {
+ *_abort_code = abort_code;
+ kfree(ticket);
+ return ret;
+ }
+
+ /* use the session key from inside the ticket to decrypt the
+ * response */
+ rxkad_decrypt_response(conn, &response, &session_key);
+
+ abort_code = RXKADSEALEDINCON;
+ if (response.encrypted.epoch != conn->epoch)
+ goto protocol_error_free;
+ if (response.encrypted.cid != conn->cid)
+ goto protocol_error_free;
+ if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
+ goto protocol_error_free;
+ csum = response.encrypted.checksum;
+ response.encrypted.checksum = 0;
+ rxkad_calc_response_checksum(&response);
+ if (response.encrypted.checksum != csum)
+ goto protocol_error_free;
+
+ if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[1]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[2]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[3]) > INT_MAX)
+ goto protocol_error_free;
+
+ abort_code = RXKADOUTOFSEQUENCE;
+ if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
+ goto protocol_error_free;
+
+ abort_code = RXKADLEVELFAIL;
+ level = ntohl(response.encrypted.level);
+ if (level > RXRPC_SECURITY_ENCRYPT)
+ goto protocol_error_free;
+ conn->security_level = level;
+
+ /* create a key to hold the security data and expiration time - after
+ * this the connection security can be handled in exactly the same way
+ * as for a client connection */
+ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
+ if (ret < 0) {
+ kfree(ticket);
+ return ret;
+ }
+
+ kfree(ticket);
+ _leave(" = 0");
+ return 0;
+
+protocol_error_free:
+ kfree(ticket);
+protocol_error:
+ *_abort_code = abort_code;
+ _leave(" = -EPROTO [%d]", abort_code);
+ return -EPROTO;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxkad_clear(struct rxrpc_connection *conn)
+{
+ _enter("");
+
+ if (conn->cipher)
+ crypto_free_blkcipher(conn->cipher);
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+static struct rxrpc_security rxkad = {
+ .owner = THIS_MODULE,
+ .name = "rxkad",
+ .security_index = RXKAD_VERSION,
+ .init_connection_security = rxkad_init_connection_security,
+ .prime_packet_security = rxkad_prime_packet_security,
+ .secure_packet = rxkad_secure_packet,
+ .verify_packet = rxkad_verify_packet,
+ .issue_challenge = rxkad_issue_challenge,
+ .respond_to_challenge = rxkad_respond_to_challenge,
+ .verify_response = rxkad_verify_response,
+ .clear = rxkad_clear,
+};
+
+static __init int rxkad_init(void)
+{
+ _enter("");
+
+ /* pin the cipher we need so that the crypto layer doesn't invoke
+ * keventd to go get it */
+ rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(rxkad_ci))
+ return PTR_ERR(rxkad_ci);
+
+ return rxrpc_register_security(&rxkad);
+}
+
+module_init(rxkad_init);
+
+static __exit void rxkad_exit(void)
+{
+ _enter("");
+
+ rxrpc_unregister_security(&rxkad);
+ crypto_free_blkcipher(rxkad_ci);
+}
+
+module_exit(rxkad_exit);
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
deleted file mode 100644
index 9896fd87a4d4..000000000000
--- a/net/rxrpc/rxrpc_syms.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* rxrpc_syms.c: exported Rx RPC layer interface symbols
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/krxiod.h>
-
-/* call.c */
-EXPORT_SYMBOL(rxrpc_create_call);
-EXPORT_SYMBOL(rxrpc_put_call);
-EXPORT_SYMBOL(rxrpc_call_abort);
-EXPORT_SYMBOL(rxrpc_call_read_data);
-EXPORT_SYMBOL(rxrpc_call_write_data);
-
-/* connection.c */
-EXPORT_SYMBOL(rxrpc_create_connection);
-EXPORT_SYMBOL(rxrpc_put_connection);
-
-/* transport.c */
-EXPORT_SYMBOL(rxrpc_create_transport);
-EXPORT_SYMBOL(rxrpc_put_transport);
-EXPORT_SYMBOL(rxrpc_add_service);
-EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
deleted file mode 100644
index 884290754af7..000000000000
--- a/net/rxrpc/sysctl.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/* sysctl.c: Rx RPC control
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <rxrpc/types.h>
-#include <rxrpc/rxrpc.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-int rxrpc_ktrace;
-int rxrpc_kdebug;
-int rxrpc_kproto;
-int rxrpc_knet;
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *rxrpc_sysctl = NULL;
-
-static ctl_table rxrpc_sysctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "kdebug",
- .data = &rxrpc_kdebug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "ktrace",
- .data = &rxrpc_ktrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "kproto",
- .data = &rxrpc_kproto,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "knet",
- .data = &rxrpc_knet,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "peertimo",
- .data = &rxrpc_peer_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = &proc_doulongvec_minmax
- },
- {
- .ctl_name = 6,
- .procname = "conntimo",
- .data = &rxrpc_conn_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = &proc_doulongvec_minmax
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table rxrpc_dir_sysctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "rxrpc",
- .maxlen = 0,
- .mode = 0555,
- .child = rxrpc_sysctl_table
- },
- { .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-/*****************************************************************************/
-/*
- * initialise the sysctl stuff for Rx RPC
- */
-int rxrpc_sysctl_init(void)
-{
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table);
- if (!rxrpc_sysctl)
- return -ENOMEM;
-#endif /* CONFIG_SYSCTL */
-
- return 0;
-} /* end rxrpc_sysctl_init() */
-
-/*****************************************************************************/
-/*
- * clean up the sysctl stuff for Rx RPC
- */
-void rxrpc_sysctl_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
- if (rxrpc_sysctl) {
- unregister_sysctl_table(rxrpc_sysctl);
- rxrpc_sysctl = NULL;
- }
-#endif /* CONFIG_SYSCTL */
-
-} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
deleted file mode 100644
index 8e57be2df936..000000000000
--- a/net/rxrpc/transport.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/* transport.c: Rx Transport routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-#include <linux/ipv6.h> /* this should _really_ be in errqueue.h.. */
-#endif
-#include <linux/errqueue.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-struct errormsg {
- struct cmsghdr cmsg; /* control message header */
- struct sock_extended_err ee; /* extended error information */
- struct sockaddr_in icmp_src; /* ICMP packet source address */
-};
-
-static DEFINE_SPINLOCK(rxrpc_transports_lock);
-static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
-
-__RXACCT_DECL(atomic_t rxrpc_transport_count);
-LIST_HEAD(rxrpc_proc_transports);
-DECLARE_RWSEM(rxrpc_proc_transports_sem);
-
-static void rxrpc_data_ready(struct sock *sk, int count);
-static void rxrpc_error_report(struct sock *sk);
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
- struct list_head *msgq);
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
-
-/*****************************************************************************/
-/*
- * create a new transport endpoint using the specified UDP port
- */
-int rxrpc_create_transport(unsigned short port,
- struct rxrpc_transport **_trans)
-{
- struct rxrpc_transport *trans;
- struct sockaddr_in sin;
- mm_segment_t oldfs;
- struct sock *sock;
- int ret, opt;
-
- _enter("%hu", port);
-
- trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
- if (!trans)
- return -ENOMEM;
-
- atomic_set(&trans->usage, 1);
- INIT_LIST_HEAD(&trans->services);
- INIT_LIST_HEAD(&trans->link);
- INIT_LIST_HEAD(&trans->krxiodq_link);
- spin_lock_init(&trans->lock);
- INIT_LIST_HEAD(&trans->peer_active);
- INIT_LIST_HEAD(&trans->peer_graveyard);
- spin_lock_init(&trans->peer_gylock);
- init_waitqueue_head(&trans->peer_gy_waitq);
- rwlock_init(&trans->peer_lock);
- atomic_set(&trans->peer_count, 0);
- trans->port = port;
-
- /* create a UDP socket to be my actual transport endpoint */
- ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
- if (ret < 0)
- goto error;
-
- /* use the specified port */
- if (port) {
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_port = htons(port);
- ret = trans->socket->ops->bind(trans->socket,
- (struct sockaddr *) &sin,
- sizeof(sin));
- if (ret < 0)
- goto error;
- }
-
- opt = 1;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- set_fs(oldfs);
-
- spin_lock(&rxrpc_transports_lock);
- list_add(&trans->link, &rxrpc_transports);
- spin_unlock(&rxrpc_transports_lock);
-
- /* set the socket up */
- sock = trans->socket->sk;
- sock->sk_user_data = trans;
- sock->sk_data_ready = rxrpc_data_ready;
- sock->sk_error_report = rxrpc_error_report;
-
- down_write(&rxrpc_proc_transports_sem);
- list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
- up_write(&rxrpc_proc_transports_sem);
-
- __RXACCT(atomic_inc(&rxrpc_transport_count));
-
- *_trans = trans;
- _leave(" = 0 (%p)", trans);
- return 0;
-
- error:
- /* finish cleaning up the transport (not really needed here, but...) */
- if (trans->socket)
- trans->socket->ops->shutdown(trans->socket, 2);
-
- /* close the socket */
- if (trans->socket) {
- trans->socket->sk->sk_user_data = NULL;
- sock_release(trans->socket);
- trans->socket = NULL;
- }
-
- kfree(trans);
-
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_create_transport() */
-
-/*****************************************************************************/
-/*
- * destroy a transport endpoint
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
- _enter("%p{u=%d p=%hu}",
- trans, atomic_read(&trans->usage), trans->port);
-
- BUG_ON(atomic_read(&trans->usage) <= 0);
-
- /* to prevent a race, the decrement and the dequeue must be
- * effectively atomic */
- spin_lock(&rxrpc_transports_lock);
- if (likely(!atomic_dec_and_test(&trans->usage))) {
- spin_unlock(&rxrpc_transports_lock);
- _leave("");
- return;
- }
-
- list_del(&trans->link);
- spin_unlock(&rxrpc_transports_lock);
-
- /* finish cleaning up the transport */
- if (trans->socket)
- trans->socket->ops->shutdown(trans->socket, 2);
-
- rxrpc_krxsecd_clear_transport(trans);
- rxrpc_krxiod_dequeue_transport(trans);
-
- /* discard all peer information */
- rxrpc_peer_clearall(trans);
-
- down_write(&rxrpc_proc_transports_sem);
- list_del(&trans->proc_link);
- up_write(&rxrpc_proc_transports_sem);
- __RXACCT(atomic_dec(&rxrpc_transport_count));
-
- /* close the socket */
- if (trans->socket) {
- trans->socket->sk->sk_user_data = NULL;
- sock_release(trans->socket);
- trans->socket = NULL;
- }
-
- kfree(trans);
-
- _leave("");
-} /* end rxrpc_put_transport() */
-
-/*****************************************************************************/
-/*
- * add a service to a transport to be listened upon
- */
-int rxrpc_add_service(struct rxrpc_transport *trans,
- struct rxrpc_service *newsrv)
-{
- struct rxrpc_service *srv;
- struct list_head *_p;
- int ret = -EEXIST;
-
- _enter("%p{%hu},%p{%hu}",
- trans, trans->port, newsrv, newsrv->service_id);
-
- /* verify that the service ID is not already present */
- spin_lock(&trans->lock);
-
- list_for_each(_p, &trans->services) {
- srv = list_entry(_p, struct rxrpc_service, link);
- if (srv->service_id == newsrv->service_id)
- goto out;
- }
-
- /* okay - add the transport to the list */
- list_add_tail(&newsrv->link, &trans->services);
- rxrpc_get_transport(trans);
- ret = 0;
-
- out:
- spin_unlock(&trans->lock);
-
- _leave("= %d", ret);
- return ret;
-} /* end rxrpc_add_service() */
-
-/*****************************************************************************/
-/*
- * remove a service from a transport
- */
-void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
-{
- _enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
-
- spin_lock(&trans->lock);
- list_del(&srv->link);
- spin_unlock(&trans->lock);
-
- rxrpc_put_transport(trans);
-
- _leave("");
-} /* end rxrpc_del_service() */
-
-/*****************************************************************************/
-/*
- * INET callback when data has been received on the socket.
- */
-static void rxrpc_data_ready(struct sock *sk, int count)
-{
- struct rxrpc_transport *trans;
-
- _enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
-
- /* queue the transport for attention by krxiod */
- trans = (struct rxrpc_transport *) sk->sk_user_data;
- if (trans)
- rxrpc_krxiod_queue_transport(trans);
-
- /* wake up anyone waiting on the socket */
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
-
- _leave("");
-} /* end rxrpc_data_ready() */
-
-/*****************************************************************************/
-/*
- * INET callback when an ICMP error packet is received
- * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
- */
-static void rxrpc_error_report(struct sock *sk)
-{
- struct rxrpc_transport *trans;
-
- _enter("%p{t=%p}", sk, sk->sk_user_data);
-
- /* queue the transport for attention by krxiod */
- trans = (struct rxrpc_transport *) sk->sk_user_data;
- if (trans) {
- trans->error_rcvd = 1;
- rxrpc_krxiod_queue_transport(trans);
- }
-
- /* wake up anyone waiting on the socket */
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
-
- _leave("");
-} /* end rxrpc_error_report() */
-
-/*****************************************************************************/
-/*
- * split a message up, allocating message records and filling them in
- * from the contents of a socket buffer
- */
-static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
- struct sk_buff *pkt,
- struct list_head *msgq)
-{
- struct rxrpc_message *msg;
- int ret;
-
- _enter("");
-
- msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
- if (!msg) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&msg->usage, 1);
- list_add_tail(&msg->link,msgq);
-
- /* dig out the Rx routing parameters */
- if (skb_copy_bits(pkt, sizeof(struct udphdr),
- &msg->hdr, sizeof(msg->hdr)) < 0) {
- ret = -EBADMSG;
- goto error;
- }
-
- msg->trans = trans;
- msg->state = RXRPC_MSG_RECEIVED;
- skb_get_timestamp(pkt, &msg->stamp);
- if (msg->stamp.tv_sec == 0) {
- do_gettimeofday(&msg->stamp);
- if (pkt->sk)
- sock_enable_timestamp(pkt->sk);
- }
- msg->seq = ntohl(msg->hdr.seq);
-
- /* attach the packet */
- skb_get(pkt);
- msg->pkt = pkt;
-
- msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
- msg->dsize = msg->pkt->len - msg->offset;
-
- _net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
- ntohl(msg->hdr.epoch),
- (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
- ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
- ntohl(msg->hdr.callNumber),
- rxrpc_pkts[msg->hdr.type],
- msg->hdr.flags,
- ntohs(msg->hdr.serviceId),
- msg->hdr.securityIndex);
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
-
- /* split off jumbo packets */
- while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- msg->hdr.flags & RXRPC_JUMBO_PACKET
- ) {
- struct rxrpc_jumbo_header jumbo;
- struct rxrpc_message *jumbomsg = msg;
-
- _debug("split jumbo packet");
-
- /* quick sanity check */
- ret = -EBADMSG;
- if (msg->dsize <
- RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
- goto error;
- if (msg->hdr.flags & RXRPC_LAST_PACKET)
- goto error;
-
- /* dig out the secondary header */
- if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
- &jumbo, sizeof(jumbo)) < 0)
- goto error;
-
- /* allocate a new message record */
- ret = -ENOMEM;
- msg = kmemdup(jumbomsg, sizeof(struct rxrpc_message), GFP_KERNEL);
- if (!msg)
- goto error;
-
- list_add_tail(&msg->link, msgq);
-
- /* adjust the jumbo packet */
- jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
-
- /* attach the packet here too */
- skb_get(pkt);
-
- /* adjust the parameters */
- msg->seq++;
- msg->hdr.seq = htonl(msg->seq);
- msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
- msg->offset += RXRPC_JUMBO_DATALEN +
- sizeof(struct rxrpc_jumbo_header);
- msg->dsize -= RXRPC_JUMBO_DATALEN +
- sizeof(struct rxrpc_jumbo_header);
- msg->hdr.flags = jumbo.flags;
- msg->hdr._rsvd = jumbo._rsvd;
-
- _net("Rx Split jumbo packet from %s"
- " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
- ntohl(msg->hdr.epoch),
- (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
- ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
- ntohl(msg->hdr.callNumber),
- rxrpc_pkts[msg->hdr.type],
- msg->hdr.flags,
- ntohs(msg->hdr.serviceId),
- msg->hdr.securityIndex);
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
- }
-
- _leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
- return 0;
-
- error:
- while (!list_empty(msgq)) {
- msg = list_entry(msgq->next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- rxrpc_put_message(msg);
- }
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_incoming_msg() */
-
-/*****************************************************************************/
-/*
- * accept a new call
- * - called from krxiod in process context
- */
-void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
-{
- struct rxrpc_message *msg;
- struct rxrpc_peer *peer;
- struct sk_buff *pkt;
- int ret;
- __be32 addr;
- __be16 port;
-
- LIST_HEAD(msgq);
-
- _enter("%p{%d}", trans, trans->port);
-
- for (;;) {
- /* deal with outstanting errors first */
- if (trans->error_rcvd)
- rxrpc_trans_receive_error_report(trans);
-
- /* attempt to receive a packet */
- pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
- if (!pkt) {
- if (ret == -EAGAIN) {
- _leave(" EAGAIN");
- return;
- }
-
- /* an icmp error may have occurred */
- rxrpc_krxiod_queue_transport(trans);
- _leave(" error %d\n", ret);
- return;
- }
-
- /* we'll probably need to checksum it (didn't call
- * sock_recvmsg) */
- if (skb_checksum_complete(pkt)) {
- kfree_skb(pkt);
- rxrpc_krxiod_queue_transport(trans);
- _leave(" CSUM failed");
- return;
- }
-
- addr = pkt->nh.iph->saddr;
- port = pkt->h.uh->source;
-
- _net("Rx Received UDP packet from %08x:%04hu",
- ntohl(addr), ntohs(port));
-
- /* unmarshall the Rx parameters and split jumbo packets */
- ret = rxrpc_incoming_msg(trans, pkt, &msgq);
- if (ret < 0) {
- kfree_skb(pkt);
- rxrpc_krxiod_queue_transport(trans);
- _leave(" bad packet");
- return;
- }
-
- BUG_ON(list_empty(&msgq));
-
- msg = list_entry(msgq.next, struct rxrpc_message, link);
-
- /* locate the record for the peer from which it
- * originated */
- ret = rxrpc_peer_lookup(trans, addr, &peer);
- if (ret < 0) {
- kdebug("Rx No connections from that peer");
- rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
- goto finished_msg;
- }
-
- /* try and find a matching connection */
- ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
- if (ret < 0) {
- kdebug("Rx Unknown Connection");
- rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
- rxrpc_put_peer(peer);
- goto finished_msg;
- }
- rxrpc_put_peer(peer);
-
- /* deal with the first packet of a new call */
- if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
- msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- ntohl(msg->hdr.seq) == 1
- ) {
- _debug("Rx New server call");
- rxrpc_trans_receive_new_call(trans, &msgq);
- goto finished_msg;
- }
-
- /* deal with subsequent packet(s) of call */
- _debug("Rx Call packet");
- while (!list_empty(&msgq)) {
- msg = list_entry(msgq.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
- if (ret < 0) {
- rxrpc_trans_immediate_abort(trans, msg, ret);
- rxrpc_put_message(msg);
- goto finished_msg;
- }
-
- rxrpc_put_message(msg);
- }
-
- goto finished_msg;
-
- /* dispose of the packets */
- finished_msg:
- while (!list_empty(&msgq)) {
- msg = list_entry(msgq.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- rxrpc_put_message(msg);
- }
- kfree_skb(pkt);
- }
-
- _leave("");
-
-} /* end rxrpc_trans_receive_packet() */
-
-/*****************************************************************************/
-/*
- * accept a new call from a client trying to connect to one of my services
- * - called in process context
- */
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
- struct list_head *msgq)
-{
- struct rxrpc_message *msg;
-
- _enter("");
-
- /* only bother with the first packet */
- msg = list_entry(msgq->next, struct rxrpc_message, link);
- list_del_init(&msg->link);
- rxrpc_krxsecd_queue_incoming_call(msg);
- rxrpc_put_message(msg);
-
- _leave(" = 0");
-
- return 0;
-} /* end rxrpc_trans_receive_new_call() */
-
-/*****************************************************************************/
-/*
- * perform an immediate abort without connection or call structures
- */
-int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
- struct rxrpc_message *msg,
- int error)
-{
- struct rxrpc_header ahdr;
- struct sockaddr_in sin;
- struct msghdr msghdr;
- struct kvec iov[2];
- __be32 _error;
- int len, ret;
-
- _enter("%p,%p,%d", trans, msg, error);
-
- /* don't abort an abort packet */
- if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
- _leave(" = 0");
- return 0;
- }
-
- _error = htonl(-error);
-
- /* set up the message to be transmitted */
- memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
- ahdr.epoch = msg->hdr.epoch;
- ahdr.serial = htonl(1);
- ahdr.seq = 0;
- ahdr.type = RXRPC_PACKET_TYPE_ABORT;
- ahdr.flags = RXRPC_LAST_PACKET;
- ahdr.flags |= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-
- iov[0].iov_len = sizeof(ahdr);
- iov[0].iov_base = &ahdr;
- iov[1].iov_len = sizeof(_error);
- iov[1].iov_base = &_error;
-
- len = sizeof(ahdr) + sizeof(_error);
-
- memset(&sin,0,sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_port = msg->pkt->h.uh->source;
- sin.sin_addr.s_addr = msg->pkt->nh.iph->saddr;
-
- msghdr.msg_name = &sin;
- msghdr.msg_namelen = sizeof(sin);
- msghdr.msg_control = NULL;
- msghdr.msg_controllen = 0;
- msghdr.msg_flags = MSG_DONTWAIT;
-
- _net("Sending message type %d of %d bytes to %08x:%d",
- ahdr.type,
- len,
- ntohl(sin.sin_addr.s_addr),
- ntohs(sin.sin_port));
-
- /* send the message */
- ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_trans_immediate_abort() */
-
-/*****************************************************************************/
-/*
- * receive an ICMP error report and percolate it to all connections
- * heading to the affected host or port
- */
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
-{
- struct rxrpc_connection *conn;
- struct sockaddr_in sin;
- struct rxrpc_peer *peer;
- struct list_head connq, *_p;
- struct errormsg emsg;
- struct msghdr msg;
- __be16 port;
- int local, err;
-
- _enter("%p", trans);
-
- for (;;) {
- trans->error_rcvd = 0;
-
- /* try and receive an error message */
- msg.msg_name = &sin;
- msg.msg_namelen = sizeof(sin);
- msg.msg_control = &emsg;
- msg.msg_controllen = sizeof(emsg);
- msg.msg_flags = 0;
-
- err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
- MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
-
- if (err == -EAGAIN) {
- _leave("");
- return;
- }
-
- if (err < 0) {
- printk("%s: unable to recv an error report: %d\n",
- __FUNCTION__, err);
- _leave("");
- return;
- }
-
- msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
-
- if (msg.msg_controllen < sizeof(emsg.cmsg) ||
- msg.msg_namelen < sizeof(sin)) {
- printk("%s: short control message"
- " (nlen=%u clen=%Zu fl=%x)\n",
- __FUNCTION__,
- msg.msg_namelen,
- msg.msg_controllen,
- msg.msg_flags);
- continue;
- }
-
- _net("Rx Received control message"
- " { len=%Zu level=%u type=%u }",
- emsg.cmsg.cmsg_len,
- emsg.cmsg.cmsg_level,
- emsg.cmsg.cmsg_type);
-
- if (sin.sin_family != AF_INET) {
- printk("Rx Ignoring error report with non-INET address"
- " (fam=%u)",
- sin.sin_family);
- continue;
- }
-
- _net("Rx Received message pertaining to host addr=%x port=%hu",
- ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-
- if (emsg.cmsg.cmsg_level != SOL_IP ||
- emsg.cmsg.cmsg_type != IP_RECVERR) {
- printk("Rx Ignoring unknown error report"
- " { level=%u type=%u }",
- emsg.cmsg.cmsg_level,
- emsg.cmsg.cmsg_type);
- continue;
- }
-
- if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
- printk("%s: short error message (%Zu)\n",
- __FUNCTION__, msg.msg_controllen);
- _leave("");
- return;
- }
-
- port = sin.sin_port;
-
- switch (emsg.ee.ee_origin) {
- case SO_EE_ORIGIN_ICMP:
- local = 0;
- switch (emsg.ee.ee_type) {
- case ICMP_DEST_UNREACH:
- switch (emsg.ee.ee_code) {
- case ICMP_NET_UNREACH:
- _net("Rx Received ICMP Network Unreachable");
- port = 0;
- err = -ENETUNREACH;
- break;
- case ICMP_HOST_UNREACH:
- _net("Rx Received ICMP Host Unreachable");
- port = 0;
- err = -EHOSTUNREACH;
- break;
- case ICMP_PORT_UNREACH:
- _net("Rx Received ICMP Port Unreachable");
- err = -ECONNREFUSED;
- break;
- case ICMP_NET_UNKNOWN:
- _net("Rx Received ICMP Unknown Network");
- port = 0;
- err = -ENETUNREACH;
- break;
- case ICMP_HOST_UNKNOWN:
- _net("Rx Received ICMP Unknown Host");
- port = 0;
- err = -EHOSTUNREACH;
- break;
- default:
- _net("Rx Received ICMP DestUnreach { code=%u }",
- emsg.ee.ee_code);
- err = emsg.ee.ee_errno;
- break;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- _net("Rx Received ICMP TTL Exceeded");
- err = emsg.ee.ee_errno;
- break;
-
- default:
- _proto("Rx Received ICMP error { type=%u code=%u }",
- emsg.ee.ee_type, emsg.ee.ee_code);
- err = emsg.ee.ee_errno;
- break;
- }
- break;
-
- case SO_EE_ORIGIN_LOCAL:
- _proto("Rx Received local error { error=%d }",
- emsg.ee.ee_errno);
- local = 1;
- err = emsg.ee.ee_errno;
- break;
-
- case SO_EE_ORIGIN_NONE:
- case SO_EE_ORIGIN_ICMP6:
- default:
- _proto("Rx Received error report { orig=%u }",
- emsg.ee.ee_origin);
- local = 0;
- err = emsg.ee.ee_errno;
- break;
- }
-
- /* find all the connections between this transport and the
- * affected destination */
- INIT_LIST_HEAD(&connq);
-
- if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
- &peer) == 0) {
- read_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection,
- link);
- if (port && conn->addr.sin_port != port)
- continue;
- if (!list_empty(&conn->err_link))
- continue;
-
- rxrpc_get_connection(conn);
- list_add_tail(&conn->err_link, &connq);
- }
- read_unlock(&peer->conn_lock);
-
- /* service all those connections */
- while (!list_empty(&connq)) {
- conn = list_entry(connq.next,
- struct rxrpc_connection,
- err_link);
- list_del(&conn->err_link);
-
- rxrpc_conn_handle_error(conn, local, err);
-
- rxrpc_put_connection(conn);
- }
-
- rxrpc_put_peer(peer);
- }
- }
-
- _leave("");
- return;
-} /* end rxrpc_trans_receive_error_report() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd86476..475df8449be9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
if NET_SCHED
-choice
- prompt "Packet scheduler clock source"
- default NET_SCH_CLK_GETTIMEOFDAY
- ---help---
- Packet schedulers need a monotonic clock that increments at a static
- rate. The kernel provides several suitable interfaces, each with
- different properties:
-
- - high resolution (us or better)
- - fast to read (minimal locking, no i/o access)
- - synchronized on all processors
- - handles cpu clock frequency changes
-
- but nothing provides all of the above.
-
-config NET_SCH_CLK_JIFFIES
- bool "Timer interrupt"
- ---help---
- Say Y here if you want to use the timer interrupt (jiffies) as clock
- source. This clock source is fast, synchronized on all processors and
- handles cpu clock frequency changes, but its resolution is too low
- for accurate shaping except at very low speed.
-
-config NET_SCH_CLK_GETTIMEOFDAY
- bool "gettimeofday"
- ---help---
- Say Y here if you want to use gettimeofday as clock source. This clock
- source has high resolution, is synchronized on all processors and
- handles cpu clock frequency changes, but it is slow.
-
- Choose this if you need a high resolution clock source but can't use
- the CPU's cycle counter.
-
-# don't allow on SMP x86 because they can have unsynchronized TSCs.
-# gettimeofday is a good alternative
-config NET_SCH_CLK_CPU
- bool "CPU cycle counter"
- depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
- ---help---
- Say Y here if you want to use the CPU's cycle counter as clock source.
- This is a cheap and high resolution clock source, but on some
- architectures it is not synchronized on all processors and doesn't
- handle cpu clock frequency changes.
-
- The useable cycle counters are:
-
- x86/x86_64 - Timestamp Counter
- alpha - Cycle Counter
- sparc64 - %ticks register
- ppc64 - Time base
- ia64 - Interval Time Counter
-
- Choose this if your CPU's cycle counter is working properly.
-
-endchoice
-
comment "Queueing/Scheduling"
config NET_SCH_CBQ
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ff2d6e5e282c..020767a204d4 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
-obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cb21617a5670..711dd26c95c3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <net/sock.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
+#include <net/netlink.h>
void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
{
@@ -93,15 +93,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
continue;
a->priv = p;
a->order = n_i;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, 0, 0);
if (err < 0) {
index--;
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO)
goto done;
@@ -114,7 +114,7 @@ done:
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
@@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
struct rtattr *r ;
int i= 0, n_i = 0;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
for (i = 0; i < (hinfo->hmask + 1); i++) {
@@ -140,11 +140,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
}
}
RTA_PUT(skb, TCA_FCNT, 4, &n_i);
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
return -EINVAL;
}
@@ -423,7 +423,7 @@ int
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
int err = -EINVAL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *r;
if (a->ops == NULL || a->ops->dump == NULL)
@@ -432,15 +432,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
if (tcf_action_copy_stats(skb, a, 0))
goto rtattr_failure;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
return err;
}
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *r ;
while ((a = act) != NULL) {
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
act = a->next;
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
}
return 0;
@@ -467,7 +467,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
rtattr_failure:
err = -EINVAL;
errout:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return err;
}
@@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
{
struct tcamsg *t;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *x;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
@@ -645,20 +645,20 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr*) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
if (tcf_action_dump(skb, a, bind, ref) < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8*)x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
return -ENOBUFS;
}
- b = (unsigned char *)skb->tail;
+ b = skb_tail_pointer(skb);
if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
goto err_out;
@@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr *) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
if (err < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8 *) x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(a->ops->owner);
kfree(a);
@@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
if (!skb)
return -ENOBUFS;
- b = (unsigned char *)skb->tail;
+ b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
t = NLMSG_DATA(nlh);
@@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr*) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
if (tcf_action_dump(skb, a, 0, 0) < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8*)x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
NETLINK_CB(skb).dst_group = RTNLGRP_TC;
err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
@@ -1015,7 +1015,7 @@ static int
tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *x;
struct tc_action_ops *a_o;
struct tc_action a;
@@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr *) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
@@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
goto rtattr_failure;
if (ret > 0) {
- x->rta_len = skb->tail - (u8 *) x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
ret = skb->len;
} else
- skb_trim(skb, (u8*)x - skb->data);
+ nlmsg_trim(skb, x);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
if (NETLINK_CB(cb->skb).pid && ret)
nlh->nlmsg_flags |= NLM_F_MULTI;
module_put(a_o->owner);
@@ -1070,20 +1070,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
rtattr_failure:
nlmsg_failure:
module_put(a_o->owner);
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return skb->len;
}
static int __init tc_action_init(void)
{
- struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
-
- if (link_p) {
- link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
- }
+ rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
return 0;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 87d0faf32867..7517f3791541 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_gact.h>
@@ -155,7 +156,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_gact opt;
struct tcf_gact *gact = a->priv;
struct tcf_t t;
@@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 47f0b1324239..00b05f422d45 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/kmod.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_ipt.h>
@@ -245,7 +246,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_ipt *ipt = a->priv;
struct ipt_entry_target *t;
struct tcf_t tm;
@@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
kfree(t);
return -1;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 68f26cb278f9..de21c92faaa2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_mirred.h>
@@ -198,7 +199,7 @@ bad_mirred:
skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
skb2->dev = dev;
- skb2->input_dev = skb->dev;
+ skb2->iif = skb->dev->ifindex;
dev_queue_xmit(skb2);
spin_unlock(&m->tcf_lock);
return m->tcf_action;
@@ -206,7 +207,7 @@ bad_mirred:
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = a->priv;
struct tc_mirred opt;
struct tcf_t t;
@@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce4..45b3cda86a21 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
@@ -136,7 +137,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
}
}
- pptr = skb->nh.raw;
+ pptr = skb_network_header(skb);
spin_lock(&p->tcf_lock);
@@ -195,7 +196,7 @@ done:
static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = a->priv;
struct tc_pedit *opt;
struct tcf_t t;
@@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
kfree(opt);
return -1;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 10a5a5c36f76..616f465f407e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -30,6 +30,7 @@
#include <linux/init.h>
#include <net/sock.h>
#include <net/act_api.h>
+#include <net/netlink.h>
#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
@@ -80,7 +81,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
continue;
a->priv = p;
a->order = index;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
if (type == RTM_DELACTION)
err = tcf_action_dump_1(skb, a, 0, 1);
@@ -88,10 +89,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
err = tcf_action_dump_1(skb, a, 0, 0);
if (err < 0) {
index--;
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
n_i++;
}
}
@@ -102,7 +103,7 @@ done:
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
#endif
@@ -240,7 +241,7 @@ override:
if (ret != ACT_P_CREATED)
return ret;
- PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcfp_t_c = psched_get_time();
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(&police_idx_gen, &police_hash_info);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -295,10 +296,9 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
return police->tcfp_result;
}
- PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
- police->tcfp_burst);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+ police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
static int
tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_police *police = a->priv;
struct tc_police opt;
@@ -355,7 +355,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -494,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
}
if (police->tcfp_P_tab)
police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
- PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcfp_t_c = psched_get_time();
police->tcf_index = parm->index ? parm->index :
tcf_police_new_index();
police->tcf_action = parm->action;
@@ -542,9 +542,9 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
return police->tcfp_result;
}
- PSCHED_GET_TIME(now);
- toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
- police->tcfp_burst);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+ police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police);
int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_police opt;
opt.index = police->tcf_index;
@@ -598,7 +598,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c7971182af07..36e1edad5990 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -16,6 +16,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#define TCA_ACT_SIMP 22
@@ -155,7 +156,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_defact *d = a->priv;
struct tc_defact opt;
struct tcf_t t;
@@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c6ffdb77d2d..ebf94edf0478 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,9 +29,10 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/kmod.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -323,7 +324,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
@@ -340,12 +341,12 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
goto rtattr_failure;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -399,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return skb->len;
- read_lock(&qdisc_tree_lock);
if (!tcm->tcm_parent)
q = dev->qdisc_sleeping;
else
@@ -456,7 +456,6 @@ errout:
if (cl)
cops->put(q, cl);
out:
- read_unlock(&qdisc_tree_lock);
dev_put(dev);
return skb->len;
}
@@ -563,30 +562,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
- struct rtattr * p_rta = (struct rtattr*) skb->tail;
+ struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
if (exts->action->type != TCA_OLD_COMPAT) {
RTA_PUT(skb, map->action, 0, NULL);
if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
} else if (map->police) {
RTA_PUT(skb, map->police, 0, NULL);
if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
}
}
#elif defined CONFIG_NET_CLS_POLICE
if (map->police && exts->police) {
- struct rtattr * p_rta = (struct rtattr*) skb->tail;
+ struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, map->police, 0, NULL);
if (tcf_police_dump(skb, exts->police) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
}
#endif
return 0;
@@ -614,18 +613,11 @@ rtattr_failure: __attribute__ ((unused))
static int __init tc_filter_init(void)
{
- struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+ tc_dump_tfilter);
- /* Setup rtnetlink links. It is made here to avoid
- exporting large number of public symbols.
- */
-
- if (link_p) {
- link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
- }
return 0;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fad08e521c24..c885412d79d5 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -81,6 +82,13 @@ static void basic_put(struct tcf_proto *tp, unsigned long f)
static int basic_init(struct tcf_proto *tp)
{
+ struct basic_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+ INIT_LIST_HEAD(&head->flist);
+ tp->root = head;
return 0;
}
@@ -102,6 +110,7 @@ static void basic_destroy(struct tcf_proto *tp)
list_del(&f->link);
basic_delete_filter(tp, f);
}
+ kfree(head);
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
@@ -176,15 +185,6 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
}
err = -ENOBUFS;
- if (head == NULL) {
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (head == NULL)
- goto errout;
-
- INIT_LIST_HEAD(&head->flist);
- tp->root = head;
- }
-
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (f == NULL)
goto errout;
@@ -246,7 +246,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct basic_filter *f = (struct basic_filter *) fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (f == NULL)
@@ -264,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto rtattr_failure;
- rta->rta_len = (skb->tail - b);
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5dbb9d451f73..bbec4a0d4dcb 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -38,6 +38,7 @@
#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -348,7 +349,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
{
struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f = (struct fw_filter*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (f == NULL)
@@ -374,7 +375,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
goto rtattr_failure;
@@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e85df07d8ce7..cc941d0ee3a5 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -28,6 +28,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -88,12 +89,12 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
static inline
void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
{
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
memset(head->fastmap, 0, sizeof(head->fastmap));
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
-static void __inline__
+static inline void
route4_set_fastmap(struct route4_head *head, u32 id, int iif,
struct route4_filter *f)
{
@@ -562,7 +563,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct route4_filter *f = (struct route4_filter*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
u32 id;
@@ -591,7 +592,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
goto rtattr_failure;
@@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 1d4a1fb17608..0a683c07c648 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -31,6 +31,7 @@
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7853621a04cc..22f9ede70e8f 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,9 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
u8 tunnelid = 0;
u8 *xprt;
#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr = skb->nh.ipv6h;
+ struct ipv6hdr *nhptr = ipv6_hdr(skb);
#else
- struct iphdr *nhptr = skb->nh.iph;
+ struct iphdr *nhptr = ip_hdr(skb);
#endif
restart:
@@ -160,7 +160,7 @@ restart:
dst = &nhptr->daddr;
protocol = nhptr->protocol;
xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
- if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+ if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
return -1;
#endif
@@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
{
struct rsvp_filter *f = (struct rsvp_filter*)fh;
struct rsvp_session *s;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_rsvp_pinfo pinfo;
@@ -623,14 +623,14 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
goto rtattr_failure;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index a2979d89798f..93b6abed57db 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -34,6 +34,7 @@
#include <net/sock.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/netlink.h>
#define RSVP_DST_LEN 4
#define RSVP_ID "rsvp6"
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 040e2d2d281a..47ac0c556429 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/act_api.h>
+#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/route.h>
@@ -245,9 +246,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
}
if (tb[TCA_TCINDEX_SHIFT-1]) {
- if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16))
+ if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int))
goto errout;
- cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+ cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
}
err = -EBUSY;
@@ -448,7 +449,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
{
struct tcindex_data *p = PRIV(tp);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +464,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
&p->fall_through);
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
} else {
if (p->perfect) {
t->tcm_handle = r-p->perfect;
@@ -486,7 +487,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
goto rtattr_failure;
@@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d25..c7a347bd6d70 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -50,6 +50,7 @@
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -119,7 +120,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
} stack[TC_U32_MAXDEPTH];
struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
- u8 *ptr = skb->nh.raw;
+ u8 *ptr = skb_network_header(skb);
struct tc_u_knode *n;
int sdepth = 0;
int off2 = 0;
@@ -213,7 +214,7 @@ check_terminal:
off2 = 0;
}
- if (ptr < skb->tail)
+ if (ptr < skb_tail_pointer(skb))
goto next_ht;
}
@@ -435,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
BUG_TRAP(ht->refcnt == 0);
kfree(ht);
- };
+ }
kfree(tp_c);
}
@@ -718,7 +719,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (n == NULL)
@@ -765,14 +766,14 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
#endif
}
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (TC_U32_KEY(n->handle))
if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
goto rtattr_failure;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c67969..0a2a7fe08de3 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
struct tcf_pkt_info *info)
{
struct tc_u32_key *key = (struct tc_u32_key *) em->data;
- unsigned char *ptr = skb->nh.raw;
+ const unsigned char *ptr = skb_network_header(skb);
if (info) {
if (info->ptr)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 959c306c5714..63146d339d81 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
{
int i;
- struct rtattr * top_start = (struct rtattr*) skb->tail;
- struct rtattr * list_start;
+ u8 *tail;
+ struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
+ struct rtattr *list_start;
RTA_PUT(skb, tlv, 0, NULL);
RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
- list_start = (struct rtattr *) skb->tail;
+ list_start = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+ tail = skb_tail_pointer(skb);
for (i = 0; i < tree->hdr.nmatches; i++) {
- struct rtattr *match_start = (struct rtattr*) skb->tail;
+ struct rtattr *match_start = (struct rtattr *)tail;
struct tcf_ematch *em = tcf_em_get_match(tree, i);
struct tcf_ematch_hdr em_hdr = {
.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
} else if (em->datalen > 0)
RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
- match_start->rta_len = skb->tail - (u8*) match_start;
+ tail = skb_tail_pointer(skb);
+ match_start->rta_len = tail - (u8 *)match_start;
}
- list_start->rta_len = skb->tail - (u8 *) list_start;
- top_start->rta_len = skb->tail - (u8 *) top_start;
+ list_start->rta_len = tail - (u8 *)list_start;
+ top_start->rta_len = tail - (u8 *)top_start;
return 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecc988af4a9a..bec600af03ca 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,14 +27,15 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -190,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
(root qdisc, all its children, children of children etc.)
*/
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
@@ -201,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
return NULL;
}
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
- struct Qdisc *q;
-
- read_lock(&qdisc_tree_lock);
- q = __qdisc_lookup(dev, handle);
- read_unlock(&qdisc_tree_lock);
- return q;
-}
-
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -291,6 +282,48 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
}
}
+static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
+{
+ struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
+ timer);
+ struct net_device *dev = wd->qdisc->dev;
+
+ wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+ smp_wmb();
+ if (spin_trylock(&dev->queue_lock)) {
+ qdisc_run(dev);
+ spin_unlock(&dev->queue_lock);
+ } else
+ netif_schedule(dev);
+
+ return HRTIMER_NORESTART;
+}
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+ hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ wd->timer.function = qdisc_watchdog;
+ wd->qdisc = qdisc;
+}
+EXPORT_SYMBOL(qdisc_watchdog_init);
+
+void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+{
+ ktime_t time;
+
+ wd->qdisc->flags |= TCQ_F_THROTTLED;
+ time = ktime_set(0, 0);
+ time = ktime_add_ns(time, PSCHED_US2NS(expires));
+ hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule);
+
+void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
+{
+ hrtimer_cancel(&wd->timer);
+ wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+}
+EXPORT_SYMBOL(qdisc_watchdog_cancel);
/* Allocate an unique handle from space managed by kernel */
@@ -362,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
while ((parentid = sch->parent)) {
- sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
@@ -467,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
+ sch->stats_lock = &dev->ingress_lock;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
- } else if (handle == 0) {
- handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
- goto err_out3;
+ } else {
+ sch->stats_lock = &dev->queue_lock;
+ if (handle == 0) {
+ handle = qdisc_alloc_handle(dev);
+ err = -ENOMEM;
+ if (handle == 0)
+ goto err_out3;
+ }
}
sch->handle = handle;
@@ -621,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
return err;
if (q) {
qdisc_notify(skb, n, clid, q, NULL);
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
} else {
qdisc_notify(skb, n, clid, NULL, q);
@@ -756,17 +793,17 @@ graft:
err = qdisc_graft(dev, p, clid, q, &old_q);
if (err) {
if (q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
return err;
}
qdisc_notify(skb, n, clid, old_q, q);
if (old_q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(old_q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
}
return 0;
@@ -777,7 +814,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
@@ -811,12 +848,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -857,12 +894,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_q_idx = 0;
- read_lock(&qdisc_tree_lock);
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
@@ -870,13 +907,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
- read_unlock(&qdisc_tree_lock);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
- }
q_idx++;
}
- read_unlock(&qdisc_tree_lock);
+cont:
+ idx++;
}
done:
@@ -1015,7 +1051,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
@@ -1040,12 +1076,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1099,7 +1135,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
@@ -1121,7 +1156,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
break;
t++;
}
- read_unlock(&qdisc_tree_lock);
cb->args[0] = t;
@@ -1146,7 +1180,7 @@ reclassify:
for ( ; tp; tp = tp->next) {
if ((tp->protocol == protocol ||
- tp->protocol == __constant_htons(ETH_P_ALL)) &&
+ tp->protocol == htons(ETH_P_ALL)) &&
(err = tp->classify(skb, tp, res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
if ( TC_ACT_RECLASSIFY == err) {
@@ -1175,15 +1209,31 @@ reclassify:
return -1;
}
-static int psched_us_per_tick = 1;
-static int psched_tick_per_us = 1;
+void tcf_destroy(struct tcf_proto *tp)
+{
+ tp->ops->destroy(tp);
+ module_put(tp->ops->owner);
+ kfree(tp);
+}
+
+void tcf_destroy_chain(struct tcf_proto *fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = fl) != NULL) {
+ fl = tp->next;
+ tcf_destroy(tp);
+ }
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "%08x %08x %08x %08x\n",
- psched_tick_per_us, psched_us_per_tick,
- 1000000, HZ);
+ (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+ 1000000,
+ (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
return 0;
}
@@ -1202,101 +1252,19 @@ static const struct file_operations psched_fops = {
};
#endif
-#ifdef CONFIG_NET_SCH_CLK_CPU
-psched_tdiff_t psched_clock_per_hz;
-int psched_clock_scale;
-EXPORT_SYMBOL(psched_clock_per_hz);
-EXPORT_SYMBOL(psched_clock_scale);
-
-psched_time_t psched_time_base;
-cycles_t psched_time_mark;
-EXPORT_SYMBOL(psched_time_mark);
-EXPORT_SYMBOL(psched_time_base);
-
-/*
- * Periodically adjust psched_time_base to avoid overflow
- * with 32-bit get_cycles(). Safe up to 4GHz CPU.
- */
-static void psched_tick(unsigned long);
-static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
-
-static void psched_tick(unsigned long dummy)
-{
- if (sizeof(cycles_t) == sizeof(u32)) {
- psched_time_t dummy_stamp;
- PSCHED_GET_TIME(dummy_stamp);
- psched_timer.expires = jiffies + 1*HZ;
- add_timer(&psched_timer);
- }
-}
-
-int __init psched_calibrate_clock(void)
-{
- psched_time_t stamp, stamp1;
- struct timeval tv, tv1;
- psched_tdiff_t delay;
- long rdelay;
- unsigned long stop;
-
- psched_tick(0);
- stop = jiffies + HZ/10;
- PSCHED_GET_TIME(stamp);
- do_gettimeofday(&tv);
- while (time_before(jiffies, stop)) {
- barrier();
- cpu_relax();
- }
- PSCHED_GET_TIME(stamp1);
- do_gettimeofday(&tv1);
-
- delay = PSCHED_TDIFF(stamp1, stamp);
- rdelay = tv1.tv_usec - tv.tv_usec;
- rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
- if (rdelay > delay)
- return -1;
- delay /= rdelay;
- psched_tick_per_us = delay;
- while ((delay>>=1) != 0)
- psched_clock_scale++;
- psched_us_per_tick = 1<<psched_clock_scale;
- psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
- return 0;
-}
-#endif
-
static int __init pktsched_init(void)
{
- struct rtnetlink_link *link_p;
-
-#ifdef CONFIG_NET_SCH_CLK_CPU
- if (psched_calibrate_clock() < 0)
- return -1;
-#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
- psched_tick_per_us = HZ<<PSCHED_JSCALE;
- psched_us_per_tick = 1000000;
-#endif
-
- link_p = rtnetlink_links[PF_UNSPEC];
-
- /* Setup rtnetlink links. It is made here to avoid
- exporting large number of public symbols.
- */
-
- if (link_p) {
- link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
- link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
- link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
- }
-
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
proc_net_fops_create("psched", 0, &psched_fops);
+ rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+ rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index afb3bbd571f2..be7d299acd73 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -14,6 +14,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/file.h> /* for fput */
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -157,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
return atm_tc_get(sch,classid);
}
-
-static void destroy_filters(struct atm_flow_data *flow)
-{
- struct tcf_proto *filter;
-
- while ((filter = flow->filter_list)) {
- DPRINTK("destroy_filters: destroying filter %p\n",filter);
- flow->filter_list = filter->next;
- tcf_destroy(filter);
- }
-}
-
-
/*
* atm_tc_put handles all destructions, including the ones that are explicitly
* requested (atm_tc_destroy, etc.). The assumption here is that we never drop
@@ -194,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
*prev = flow->next;
DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
qdisc_destroy(flow->q);
- destroy_filters(flow);
+ tcf_destroy_chain(flow->filter_list);
if (flow->sock) {
DPRINTK("atm_tc_put: f_count %d\n",
file_count(flow->sock->file));
@@ -503,7 +491,7 @@ static void sch_atm_dequeue(unsigned long data)
}
D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
/* remove any LL header somebody else has attached */
- skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+ skb_pull(skb, skb_network_offset(skb));
if (skb_headroom(skb) < flow->hdr_len) {
struct sk_buff *new;
@@ -513,7 +501,7 @@ static void sch_atm_dequeue(unsigned long data)
skb = new;
}
D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
- skb->nh.iph,skb->data);
+ skb_network_header(skb), skb->data);
ATM_SKB(skb)->vcc = flow->vcc;
memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
flow->hdr_len);
@@ -610,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
/* races ? */
while ((flow = p->flows)) {
- destroy_filters(flow);
+ tcf_destroy_chain(flow->filter_list);
if (flow->ref > 1)
printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
flow->ref);
@@ -631,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
{
struct atm_qdisc_data *p = PRIV(sch);
struct atm_flow_data *flow = (struct atm_flow_data *) cl;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
@@ -661,11 +649,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
}
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb,b-skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 76c92e710a33..a294542cb8e4 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -29,6 +29,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -112,7 +113,7 @@ struct cbq_class
/* Overlimit strategy parameters */
void (*overlimit)(struct cbq_class *cl);
- long penalty;
+ psched_tdiff_t penalty;
/* General scheduler (WRR) parameters */
long allot;
@@ -143,7 +144,7 @@ struct cbq_class
psched_time_t undertime;
long avgidle;
long deficit; /* Saved deficit for WRR */
- unsigned long penalized;
+ psched_time_t penalized;
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
@@ -180,12 +181,12 @@ struct cbq_sched_data
psched_time_t now_rt; /* Cached real time */
unsigned pmask;
- struct timer_list delay_timer;
- struct timer_list wd_timer; /* Watchdog timer,
+ struct hrtimer delay_timer;
+ struct qdisc_watchdog watchdog; /* Watchdog timer,
started when CBQ has
backlog, but cannot
transmit just now */
- long wd_expires;
+ psched_tdiff_t wd_expires;
int toplevel;
u32 hgenerator;
};
@@ -384,12 +385,12 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
psched_time_t now;
psched_tdiff_t incr;
- PSCHED_GET_TIME(now);
- incr = PSCHED_TDIFF(now, q->now_rt);
- PSCHED_TADD2(q->now, incr, now);
+ now = psched_get_time();
+ incr = now - q->now_rt;
+ now = q->now + incr;
do {
- if (PSCHED_TLESS(cl->undertime, now)) {
+ if (cl->undertime < now) {
q->toplevel = cl->level;
return;
}
@@ -473,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
static void cbq_ovl_classic(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+ psched_tdiff_t delay = cl->undertime - q->now;
if (!cl->delayed) {
delay += cl->offtime;
@@ -491,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
cl->avgidle = cl->minidle;
if (delay <= 0)
delay = 1;
- PSCHED_TADD2(q->now, delay, cl->undertime);
+ cl->undertime = q->now + delay;
cl->xstats.overactions++;
cl->delayed = 1;
@@ -508,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
psched_tdiff_t base_delay = q->wd_expires;
for (b = cl->borrow; b; b = b->borrow) {
- delay = PSCHED_TDIFF(b->undertime, q->now);
+ delay = b->undertime - q->now;
if (delay < base_delay) {
if (delay <= 0)
delay = 1;
@@ -546,27 +547,32 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
static void cbq_ovl_delay(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+ psched_tdiff_t delay = cl->undertime - q->now;
if (!cl->delayed) {
- unsigned long sched = jiffies;
+ psched_time_t sched = q->now;
+ ktime_t expires;
delay += cl->offtime;
if (cl->avgidle < 0)
delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
if (cl->avgidle < cl->minidle)
cl->avgidle = cl->minidle;
- PSCHED_TADD2(q->now, delay, cl->undertime);
+ cl->undertime = q->now + delay;
if (delay > 0) {
- sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
+ sched += delay + cl->penalty;
cl->penalized = sched;
cl->cpriority = TC_CBQ_MAXPRIO;
q->pmask |= (1<<TC_CBQ_MAXPRIO);
- if (del_timer(&q->delay_timer) &&
- (long)(q->delay_timer.expires - sched) > 0)
- q->delay_timer.expires = sched;
- add_timer(&q->delay_timer);
+
+ expires = ktime_set(0, 0);
+ expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+ if (hrtimer_try_to_cancel(&q->delay_timer) &&
+ ktime_to_ns(ktime_sub(q->delay_timer.expires,
+ expires)) > 0)
+ q->delay_timer.expires = expires;
+ hrtimer_restart(&q->delay_timer);
cl->delayed = 1;
cl->xstats.overactions++;
return;
@@ -583,7 +589,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- cl->penalized = jiffies + cl->penalty;
+ cl->penalized = q->now + cl->penalty;
if (cl->cpriority != cl->priority2) {
cl->cpriority = cl->priority2;
@@ -604,27 +610,19 @@ static void cbq_ovl_drop(struct cbq_class *cl)
cbq_ovl_classic(cl);
}
-static void cbq_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc*)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
-static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
+static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
+ psched_time_t now)
{
struct cbq_class *cl;
struct cbq_class *cl_prev = q->active[prio];
- unsigned long now = jiffies;
- unsigned long sched = now;
+ psched_time_t sched = now;
if (cl_prev == NULL)
- return now;
+ return 0;
do {
cl = cl_prev->next_alive;
- if ((long)(now - cl->penalized) > 0) {
+ if (now - cl->penalized > 0) {
cl_prev->next_alive = cl->next_alive;
cl->next_alive = NULL;
cl->cpriority = cl->priority;
@@ -640,30 +638,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
}
cl = cl_prev->next_alive;
- } else if ((long)(sched - cl->penalized) > 0)
+ } else if (sched - cl->penalized > 0)
sched = cl->penalized;
} while ((cl_prev = cl) != q->active[prio]);
- return (long)(sched - now);
+ return sched - now;
}
-static void cbq_undelay(unsigned long arg)
+static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
{
- struct Qdisc *sch = (struct Qdisc*)arg;
- struct cbq_sched_data *q = qdisc_priv(sch);
- long delay = 0;
+ struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
+ delay_timer);
+ struct Qdisc *sch = q->watchdog.qdisc;
+ psched_time_t now;
+ psched_tdiff_t delay = 0;
unsigned pmask;
+ now = psched_get_time();
+
pmask = q->pmask;
q->pmask = 0;
while (pmask) {
int prio = ffz(~pmask);
- long tmp;
+ psched_tdiff_t tmp;
pmask &= ~(1<<prio);
- tmp = cbq_undelay_prio(q, prio);
+ tmp = cbq_undelay_prio(q, prio, now);
if (tmp > 0) {
q->pmask |= 1<<prio;
if (tmp < delay || delay == 0)
@@ -672,12 +674,16 @@ static void cbq_undelay(unsigned long arg)
}
if (delay) {
- q->delay_timer.expires = jiffies + delay;
- add_timer(&q->delay_timer);
+ ktime_t time;
+
+ time = ktime_set(0, 0);
+ time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+ hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
}
sch->flags &= ~TCQ_F_THROTTLED;
netif_schedule(sch->dev);
+ return HRTIMER_NORESTART;
}
@@ -732,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
if (cl && q->toplevel >= borrowed->level) {
if (cl->q->q.qlen > 1) {
do {
- if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+ if (borrowed->undertime == PSCHED_PASTPERFECT) {
q->toplevel = borrowed->level;
return;
}
@@ -770,7 +776,7 @@ cbq_update(struct cbq_sched_data *q)
idle = (now - last) - last_pktlen/rate
*/
- idle = PSCHED_TDIFF(q->now, cl->last);
+ idle = q->now - cl->last;
if ((unsigned long)idle > 128*1024*1024) {
avgidle = cl->maxidle;
} else {
@@ -814,13 +820,11 @@ cbq_update(struct cbq_sched_data *q)
idle -= L2T(&q->link, len);
idle += L2T(cl, len);
- PSCHED_AUDIT_TDIFF(idle);
-
- PSCHED_TADD2(q->now, idle, cl->undertime);
+ cl->undertime = q->now + idle;
} else {
/* Underlimit */
- PSCHED_SET_PASTPERFECT(cl->undertime);
+ cl->undertime = PSCHED_PASTPERFECT;
if (avgidle > cl->maxidle)
cl->avgidle = cl->maxidle;
else
@@ -841,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
if (cl->tparent == NULL)
return cl;
- if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
- !PSCHED_TLESS(q->now, cl->undertime)) {
+ if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
cl->delayed = 0;
return cl;
}
@@ -865,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl)
}
if (cl->level > q->toplevel)
return NULL;
- } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
- PSCHED_TLESS(q->now, cl->undertime));
+ } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
cl->delayed = 0;
return cl;
@@ -1001,8 +1003,8 @@ cbq_dequeue(struct Qdisc *sch)
psched_time_t now;
psched_tdiff_t incr;
- PSCHED_GET_TIME(now);
- incr = PSCHED_TDIFF(now, q->now_rt);
+ now = psched_get_time();
+ incr = now - q->now_rt;
if (q->tx_class) {
psched_tdiff_t incr2;
@@ -1014,12 +1016,12 @@ cbq_dequeue(struct Qdisc *sch)
cbq_time = max(real_time, work);
*/
incr2 = L2T(&q->link, q->tx_len);
- PSCHED_TADD(q->now, incr2);
+ q->now += incr2;
cbq_update(q);
if ((incr -= incr2) < 0)
incr = 0;
}
- PSCHED_TADD(q->now, incr);
+ q->now += incr;
q->now_rt = now;
for (;;) {
@@ -1051,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch)
*/
if (q->toplevel == TC_CBQ_MAXLEVEL &&
- PSCHED_IS_PASTPERFECT(q->link.undertime))
+ q->link.undertime == PSCHED_PASTPERFECT)
break;
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_SET_PASTPERFECT(q->link.undertime);
+ q->link.undertime = PSCHED_PASTPERFECT;
}
/* No packets in scheduler or nobody wants to give them to us :-(
@@ -1063,13 +1065,9 @@ cbq_dequeue(struct Qdisc *sch)
if (sch->q.qlen) {
sch->qstats.overlimits++;
- if (q->wd_expires) {
- long delay = PSCHED_US2JIFFIE(q->wd_expires);
- if (delay <= 0)
- delay = 1;
- mod_timer(&q->wd_timer, jiffies + delay);
- sch->flags |= TCQ_F_THROTTLED;
- }
+ if (q->wd_expires)
+ qdisc_watchdog_schedule(&q->watchdog,
+ now + q->wd_expires);
}
return NULL;
}
@@ -1276,10 +1274,10 @@ cbq_reset(struct Qdisc* sch)
q->pmask = 0;
q->tx_class = NULL;
q->tx_borrowed = NULL;
- del_timer(&q->wd_timer);
- del_timer(&q->delay_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
+ hrtimer_cancel(&q->delay_timer);
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
q->now_rt = q->now;
for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
@@ -1290,7 +1288,7 @@ cbq_reset(struct Qdisc* sch)
qdisc_reset(cl->q);
cl->next_alive = NULL;
- PSCHED_SET_PASTPERFECT(cl->undertime);
+ cl->undertime = PSCHED_PASTPERFECT;
cl->avgidle = cl->maxidle;
cl->deficit = cl->quantum;
cl->cpriority = cl->priority;
@@ -1379,7 +1377,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
default:
return -EINVAL;
}
- cl->penalty = (ovl->penalty*HZ)/1000;
+ cl->penalty = ovl->penalty;
return 0;
}
@@ -1446,14 +1444,11 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
q->link.minidle = -0x7FFFFFFF;
q->link.stats_lock = &sch->dev->queue_lock;
- init_timer(&q->wd_timer);
- q->wd_timer.data = (unsigned long)sch;
- q->wd_timer.function = cbq_watchdog;
- init_timer(&q->delay_timer);
- q->delay_timer.data = (unsigned long)sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
q->delay_timer.function = cbq_undelay;
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
q->now_rt = q->now;
cbq_link_class(&q->link);
@@ -1467,19 +1462,19 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_lssopt opt;
opt.flags = 0;
@@ -1498,13 +1493,13 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_wrropt opt;
opt.flags = 0;
@@ -1516,30 +1511,30 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_ovl opt;
opt.strategy = cl->ovl_strategy;
opt.priority2 = cl->priority2+1;
opt.pad = 0;
- opt.penalty = (cl->penalty*1000)/HZ;
+ opt.penalty = cl->penalty;
RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_fopt opt;
if (cl->split || cl->defmap) {
@@ -1551,14 +1546,14 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
#ifdef CONFIG_NET_CLS_POLICE
static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_police opt;
if (cl->police) {
@@ -1570,7 +1565,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
#endif
@@ -1592,18 +1587,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
rta = (struct rtattr*)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (cbq_dump_attr(skb, &q->link) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1621,7 +1616,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct cbq_class *cl = (struct cbq_class*)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (cl->tparent)
@@ -1635,11 +1630,11 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (cbq_dump_attr(skb, cl) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1654,8 +1649,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
cl->xstats.avgidle = cl->avgidle;
cl->xstats.undertime = 0;
- if (!PSCHED_IS_PASTPERFECT(cl->undertime))
- cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
+ if (cl->undertime != PSCHED_PASTPERFECT)
+ cl->xstats.undertime = cl->undertime - q->now;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
#ifdef CONFIG_NET_ESTIMATOR
@@ -1722,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
return 0;
}
-static void cbq_destroy_filters(struct cbq_class *cl)
-{
- struct tcf_proto *tp;
-
- while ((tp = cl->filter_list) != NULL) {
- cl->filter_list = tp->next;
- tcf_destroy(tp);
- }
-}
-
static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(sch);
BUG_TRAP(!cl->filters);
- cbq_destroy_filters(cl);
+ tcf_destroy_chain(cl->filter_list);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
#ifdef CONFIG_NET_ESTIMATOR
@@ -1765,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch)
*/
for (h = 0; h < 16; h++)
for (cl = q->classes[h]; cl; cl = cl->next)
- cbq_destroy_filters(cl);
+ tcf_destroy_chain(cl->filter_list);
for (h = 0; h < 16; h++) {
struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 96324cf4e6a9..3c6fd181263f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -216,17 +216,17 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
/* FIXME: Safe with non-linear skbs? --RR */
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+ skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
& ~INET_ECN_MASK;
break;
case __constant_htons(ETH_P_IPV6):
- skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+ skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
& ~INET_ECN_MASK;
break;
default:
skb->tc_index = 0;
break;
- };
+ }
}
if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
if (p->default_index != NO_DEFAULT_INDEX)
skb->tc_index = p->default_index;
break;
- };
+ }
}
err = p->q->enqueue(skb,p->q);
@@ -292,11 +292,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- ipv4_change_dsfield(skb->nh.iph, p->mask[index],
+ ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
break;
case __constant_htons(ETH_P_IPV6):
- ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
+ ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
p->value[index]);
break;
default:
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
"unsupported protocol %d\n",
ntohs(skb->protocol));
break;
- };
+ }
return skb;
}
@@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch)
static void dsmark_destroy(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- struct tcf_proto *tp;
DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
- while (p->filter_list) {
- tp = p->filter_list;
- p->filter_list = tp->next;
- tcf_destroy(tp);
- }
-
+ tcf_destroy_chain(p->filter_list);
qdisc_destroy(p->q);
kfree(p->mask);
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb3439d7c6..3385ee592541 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,27 @@
/* Main transmission queue. */
-/* Main qdisc structure lock.
-
- However, modifications
- to data, participating in scheduling must be additionally
- protected with dev->queue_lock spinlock.
-
- The idea is the following:
- - enqueue, dequeue are serialized via top level device
- spinlock dev->queue_lock.
- - tree walking is protected by read_lock(qdisc_tree_lock)
- and this lock is used only in process context.
- - updates to tree are made only under rtnl semaphore,
- hence this lock may be made without local bh disabling.
-
- qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+/* Modifications to data participating in scheduling must be protected with
+ * dev->queue_lock spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via top level device
+ * spinlock dev->queue_lock.
+ * - ingress filtering is serialized via top level device
+ * spinlock dev->ingress_lock.
+ * - updates to tree and tree walking are only done under the rtnl mutex.
*/
-DEFINE_RWLOCK(qdisc_tree_lock);
void qdisc_lock_tree(struct net_device *dev)
{
- write_lock(&qdisc_tree_lock);
spin_lock_bh(&dev->queue_lock);
+ spin_lock(&dev->ingress_lock);
}
void qdisc_unlock_tree(struct net_device *dev)
{
+ spin_unlock(&dev->ingress_lock);
spin_unlock_bh(&dev->queue_lock);
- write_unlock(&qdisc_tree_lock);
}
/*
@@ -442,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
sch->dequeue = ops->dequeue;
sch->dev = dev;
dev_hold(dev);
- sch->stats_lock = &dev->queue_lock;
atomic_set(&sch->refcnt, 1);
return sch;
@@ -458,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
sch = qdisc_alloc(dev, ops);
if (IS_ERR(sch))
goto errout;
+ sch->stats_lock = &dev->queue_lock;
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
@@ -528,15 +521,11 @@ void dev_activate(struct net_device *dev)
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
- write_lock(&qdisc_tree_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
- write_unlock(&qdisc_tree_lock);
} else {
qdisc = &noqueue_qdisc;
}
- write_lock(&qdisc_tree_lock);
dev->qdisc_sleeping = qdisc;
- write_unlock(&qdisc_tree_lock);
}
if (!netif_carrier_ok(dev))
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 396deb71480f..9d124c4ee3a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -59,13 +59,13 @@
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <linux/timer.h>
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <asm/system.h>
@@ -192,23 +192,9 @@ struct hfsc_sched
struct list_head droplist; /* active leaf class list (for
dropping) */
struct sk_buff_head requeue; /* requeued packet */
- struct timer_list wd_timer; /* watchdog timer */
+ struct qdisc_watchdog watchdog; /* watchdog timer */
};
-/*
- * macros
- */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#include <linux/time.h>
-#undef PSCHED_GET_TIME
-#define PSCHED_GET_TIME(stamp) \
-do { \
- struct timeval tv; \
- do_gettimeofday(&tv); \
- (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
-} while (0)
-#endif
-
#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
* ism: (psched_us/byte) << ISM_SHIFT
* dx: psched_us
*
- * Clock source resolution (CONFIG_NET_SCH_CLK_*)
- * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
- * CPU: resolution is between 0.5us and 1us.
- * GETTIMEOFDAY: resolution is exactly 1us.
+ * The clock source resolution with ktime is 1.024us.
*
* sm and ism are scaled in order to keep effective digits.
* SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
* digits in decimal using the following table.
*
- * Note: We can afford the additional accuracy (altq hfsc keeps at most
- * 3 effective digits) thanks to the fact that linux clock is bounded
- * much more tightly.
- *
* bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
* ------------+-------------------------------------------------------
- * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3
- * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
- * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
+ * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3
*
- * 0.5us/byte 160 16 1.6 0.16 0.016
- * us/byte 80 8 0.8 0.08 0.008
- * 1.27us/byte 63 6.3 0.63 0.063 0.0063
+ * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125
*/
#define SM_SHIFT 20
#define ISM_SHIFT 18
@@ -460,8 +435,8 @@ m2sm(u32 m)
u64 sm;
sm = ((u64)m << SM_SHIFT);
- sm += PSCHED_JIFFIE2US(HZ) - 1;
- do_div(sm, PSCHED_JIFFIE2US(HZ));
+ sm += PSCHED_TICKS_PER_SEC - 1;
+ do_div(sm, PSCHED_TICKS_PER_SEC);
return sm;
}
@@ -474,7 +449,7 @@ m2ism(u32 m)
if (m == 0)
ism = HT_INFINITY;
else {
- ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+ ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
ism += m - 1;
do_div(ism, m);
}
@@ -487,7 +462,7 @@ d2dx(u32 d)
{
u64 dx;
- dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+ dx = ((u64)d * PSCHED_TICKS_PER_SEC);
dx += USEC_PER_SEC - 1;
do_div(dx, USEC_PER_SEC);
return dx;
@@ -499,7 +474,7 @@ sm2m(u64 sm)
{
u64 m;
- m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+ m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
return (u32)m;
}
@@ -510,7 +485,7 @@ dx2d(u64 dx)
u64 d;
d = dx * USEC_PER_SEC;
- do_div(d, PSCHED_JIFFIE2US(HZ));
+ do_div(d, PSCHED_TICKS_PER_SEC);
return (u32)d;
}
@@ -654,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
static void
init_ed(struct hfsc_class *cl, unsigned int next_len)
{
- u64 cur_time;
-
- PSCHED_GET_TIME(cur_time);
+ u64 cur_time = psched_get_time();
/* update the deadline curve */
rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
@@ -779,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
if (cl->cl_flags & HFSC_USC) {
/* class has upper limit curve */
if (cur_time == 0)
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
/* update the ulimit curve */
rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
@@ -1063,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
return -EINVAL;
}
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
sch_tree_lock(sch);
if (rsc != NULL)
@@ -1149,22 +1122,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
static void
-hfsc_destroy_filters(struct tcf_proto **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
- tcf_destroy(tp);
- }
-}
-
-static void
hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
{
struct hfsc_sched *q = qdisc_priv(sch);
- hfsc_destroy_filters(&cl->filter_list);
+ tcf_destroy_chain(cl->filter_list);
qdisc_destroy(cl->qdisc);
#ifdef CONFIG_NET_ESTIMATOR
gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1184,10 +1146,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- list_del(&cl->hlist);
list_del(&cl->siblings);
hfsc_adjust_levels(cl->cl_parent);
+
hfsc_purge_queue(sch, cl);
+ list_del(&cl->hlist);
+
if (--cl->refcnt == 0)
hfsc_destroy_class(sch, cl);
@@ -1387,7 +1351,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
struct tcmsg *tcm)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta = (struct rtattr *)b;
tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
@@ -1398,11 +1362,11 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (hfsc_dump_curves(skb, cl) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1457,21 +1421,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
static void
-hfsc_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
-static void
-hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+hfsc_schedule_watchdog(struct Qdisc *sch)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl;
u64 next_time = 0;
- long delay;
if ((cl = eltree_get_minel(q)) != NULL)
next_time = cl->cl_e;
@@ -1480,11 +1434,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
next_time = q->root.cl_cfmin;
}
WARN_ON(next_time == 0);
- delay = next_time - cur_time;
- delay = PSCHED_US2JIFFIE(delay);
-
- sch->flags |= TCQ_F_THROTTLED;
- mod_timer(&q->wd_timer, jiffies + delay);
+ qdisc_watchdog_schedule(&q->watchdog, next_time);
}
static int
@@ -1521,9 +1471,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
- init_timer(&q->wd_timer);
- q->wd_timer.function = hfsc_watchdog;
- q->wd_timer.data = (unsigned long)sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
return 0;
}
@@ -1593,8 +1541,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
__skb_queue_purge(&q->requeue);
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
- del_timer(&q->wd_timer);
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_watchdog_cancel(&q->watchdog);
sch->q.qlen = 0;
}
@@ -1610,14 +1557,14 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
hfsc_destroy_class(sch, cl);
}
__skb_queue_purge(&q->requeue);
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
static int
hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
{
struct hfsc_sched *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
qopt.defcls = q->defcls;
@@ -1625,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1679,7 +1626,7 @@ hfsc_dequeue(struct Qdisc *sch)
if ((skb = __skb_dequeue(&q->requeue)))
goto out;
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
/*
* if there are eligible classes, use real-time criteria.
@@ -1696,7 +1643,7 @@ hfsc_dequeue(struct Qdisc *sch)
cl = vttree_get_minvt(&q->root, cur_time);
if (cl == NULL) {
sch->qstats.overlimits++;
- hfsc_schedule_watchdog(sch, cur_time);
+ hfsc_schedule_watchdog(sch);
return NULL;
}
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 97cbb9aec946..99bcec8dd04c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -50,6 +50,7 @@
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/compiler.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/rbtree.h>
@@ -128,7 +129,7 @@ struct htb_class {
} un;
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
struct rb_node pq_node; /* node for event queue */
- unsigned long pq_key; /* the same type as jiffies global */
+ psched_time_t pq_key;
int prio_activity; /* for which prios are we active */
enum htb_cmode cmode; /* current mode of the class */
@@ -179,10 +180,7 @@ struct htb_sched {
struct rb_root wait_pq[TC_HTB_MAXDEPTH];
/* time of nearest event per level (row) */
- unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
- /* cached value of jiffies in dequeue */
- unsigned long jiffies;
+ psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
/* whether we hit non-work conserving class during this dequeue; we use */
int nwc_hit; /* this to disable mindelay complaint in dequeue */
@@ -195,7 +193,7 @@ struct htb_sched {
int rate2quantum; /* quant = rate / rate2quantum */
psched_time_t now; /* cached dequeue time */
- struct timer_list timer; /* send delay timer */
+ struct qdisc_watchdog watchdog;
#ifdef HTB_RATECM
struct timer_list rttim; /* rate computer timer */
int recmp_bucket; /* which hash bucket to recompute next */
@@ -342,19 +340,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
{
struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
- cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
- if (cl->pq_key == q->jiffies)
+ cl->pq_key = q->now + delay;
+ if (cl->pq_key == q->now)
cl->pq_key++;
/* update the nearest event cache */
- if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+ if (q->near_ev_cache[cl->level] > cl->pq_key)
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
- if (time_after_eq(cl->pq_key, c->pq_key))
+ if (cl->pq_key >= c->pq_key)
p = &parent->rb_right;
else
p = &parent->rb_left;
@@ -679,14 +677,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-static void htb_timer(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
- sch->flags &= ~TCQ_F_THROTTLED;
- wmb();
- netif_schedule(sch->dev);
-}
-
#ifdef HTB_RATECM
#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
static void htb_rate_timer(unsigned long arg)
@@ -739,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
cl->T = toks
while (cl) {
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+ diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
@@ -778,11 +768,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
/**
* htb_do_events - make mode changes to classes at the level
*
- * Scans event queue for pending events and applies them. Returns jiffies to
+ * Scans event queue for pending events and applies them. Returns time of
* next pending event (0 for no event in pq).
- * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ * Note: Applied are events whose have cl->pq_key <= q->now.
*/
-static long htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level)
{
int i;
@@ -795,18 +785,18 @@ static long htb_do_events(struct htb_sched *q, int level)
return 0;
cl = rb_entry(p, struct htb_class, pq_node);
- if (time_after(cl->pq_key, q->jiffies)) {
- return cl->pq_key - q->jiffies;
- }
+ if (cl->pq_key > q->now)
+ return cl->pq_key;
+
htb_safe_rb_erase(p, q->wait_pq + level);
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+ diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
htb_change_class_mode(q, cl, &diff);
if (cl->cmode != HTB_CAN_SEND)
htb_add_to_wait_tree(q, cl, diff);
}
if (net_ratelimit())
printk(KERN_WARNING "htb: too many events !\n");
- return HZ / 10;
+ return q->now + PSCHED_TICKS_PER_SEC / 10;
}
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -958,30 +948,12 @@ next:
return skb;
}
-static void htb_delay_by(struct Qdisc *sch, long delay)
-{
- struct htb_sched *q = qdisc_priv(sch);
- if (delay <= 0)
- delay = 1;
- if (unlikely(delay > 5 * HZ)) {
- if (net_ratelimit())
- printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
- delay = 5 * HZ;
- }
- /* why don't use jiffies here ? because expires can be in past */
- mod_timer(&q->timer, q->jiffies + delay);
- sch->flags |= TCQ_F_THROTTLED;
- sch->qstats.overlimits++;
-}
-
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb = NULL;
struct htb_sched *q = qdisc_priv(sch);
int level;
- long min_delay;
-
- q->jiffies = jiffies;
+ psched_time_t next_event;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
skb = __skb_dequeue(&q->direct_queue);
@@ -993,23 +965,25 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
if (!sch->q.qlen)
goto fin;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
- min_delay = LONG_MAX;
+ next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
q->nwc_hit = 0;
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
- long delay;
- if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
- delay = htb_do_events(q, level);
- q->near_ev_cache[level] =
- q->jiffies + (delay ? delay : HZ);
+ psched_time_t event;
+
+ if (q->now >= q->near_ev_cache[level]) {
+ event = htb_do_events(q, level);
+ q->near_ev_cache[level] = event ? event :
+ PSCHED_TICKS_PER_SEC;
} else
- delay = q->near_ev_cache[level] - q->jiffies;
+ event = q->near_ev_cache[level];
+
+ if (event && next_event > event)
+ next_event = event;
- if (delay && min_delay > delay)
- min_delay = delay;
m = ~q->row_mask[level];
while (m != (int)(-1)) {
int prio = ffz(m);
@@ -1022,7 +996,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
}
}
}
- htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
+ sch->qstats.overlimits++;
+ qdisc_watchdog_schedule(&q->watchdog, next_event);
fin:
return skb;
}
@@ -1075,8 +1050,7 @@ static void htb_reset(struct Qdisc *sch)
}
}
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
memset(q->row, 0, sizeof(q->row));
@@ -1113,14 +1087,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
- init_timer(&q->timer);
+ qdisc_watchdog_init(&q->watchdog, sch);
skb_queue_head_init(&q->direct_queue);
q->direct_qlen = sch->dev->tx_queue_len;
if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
q->direct_qlen = 2;
- q->timer.function = htb_timer;
- q->timer.data = (unsigned long)sch;
#ifdef HTB_RATECM
init_timer(&q->rttim);
@@ -1139,7 +1111,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct htb_sched *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_htb_glob gopt;
spin_lock_bh(&sch->dev->queue_lock);
@@ -1152,12 +1124,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
rta = (struct rtattr *)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
spin_unlock_bh(&sch->dev->queue_lock);
- skb_trim(skb, skb->tail - skb->data);
+ nlmsg_trim(skb, skb_tail_pointer(skb));
return -1;
}
@@ -1165,7 +1137,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_htb_opt opt;
@@ -1188,12 +1160,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.prio = cl->un.leaf.prio;
opt.level = cl->level;
RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
spin_unlock_bh(&sch->dev->queue_lock);
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1264,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid)
return (unsigned long)cl;
}
-static void htb_destroy_filters(struct tcf_proto **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
- tcf_destroy(tp);
- }
-}
-
static inline int htb_parent_last_child(struct htb_class *cl)
{
if (!cl->parent)
@@ -1302,7 +1264,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
parent->un.leaf.prio = parent->prio;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- PSCHED_GET_TIME(parent->t_c);
+ parent->t_c = psched_get_time();
parent->cmode = HTB_CAN_SEND;
}
@@ -1317,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
qdisc_put_rtab(cl->rate);
qdisc_put_rtab(cl->ceil);
- htb_destroy_filters(&cl->filter_list);
+ tcf_destroy_chain(cl->filter_list);
while (!list_empty(&cl->children))
htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1341,7 +1303,7 @@ static void htb_destroy(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
#ifdef HTB_RATECM
del_timer_sync(&q->rttim);
#endif
@@ -1349,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch)
and surprisingly it worked in 2.4. But it must precede it
because filter need its target class alive to be able to call
unbind_filter on it (without Oops). */
- htb_destroy_filters(&q->filter_list);
+ tcf_destroy_chain(q->filter_list);
while (!list_empty(&q->root))
htb_destroy_class(sch, list_entry(q->root.next,
@@ -1380,15 +1342,15 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- /* delete from hash and active; remainder in destroy_class */
- hlist_del_init(&cl->hlist);
-
if (!cl->level) {
qlen = cl->un.leaf.q->q.qlen;
qdisc_reset(cl->un.leaf.q);
qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
}
+ /* delete from hash and active; remainder in destroy_class */
+ hlist_del_init(&cl->hlist);
+
if (cl->prio_activity)
htb_deactivate(q, cl);
@@ -1498,8 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* set class to be in HTB_CAN_SEND state */
cl->tokens = hopt->buffer;
cl->ctokens = hopt->cbuffer;
- cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
- PSCHED_GET_TIME(cl->t_c);
+ cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
+ cl->t_c = psched_get_time();
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cfe070ee6ee3..f8b9f1cdf738 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter.h>
#include <linux/smp.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <asm/byteorder.h>
#include <asm/uaccess.h>
@@ -169,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
skb->tc_index = TC_H_MIN(res.classid);
result = TC_ACT_OK;
break;
- };
+ }
/* backward compat */
#else
#ifdef CONFIG_NET_CLS_POLICE
@@ -186,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
sch->bstats.bytes += skb->len;
result = NF_ACCEPT;
break;
- };
+ }
#else
D2PRINTK("Overriding result to ACCEPT\n");
@@ -247,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb,
skb->dev ? (*pskb)->dev->name : "(no dev)",
skb->len);
-/*
-revisit later: Use a private since lock dev->queue_lock is also
-used on the egress (might slow things for an iota)
-*/
-
if (dev->qdisc_ingress) {
- spin_lock(&dev->queue_lock);
+ spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
fwres = q->enqueue(skb, q);
- spin_unlock(&dev->queue_lock);
+ spin_unlock(&dev->ingress_lock);
}
return fwres;
@@ -345,14 +341,9 @@ static void ingress_reset(struct Qdisc *sch)
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = PRIV(sch);
- struct tcf_proto *tp;
DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
- while (p->filter_list) {
- tp = p->filter_list;
- p->filter_list = tp->next;
- tcf_destroy(tp);
- }
+ tcf_destroy_chain(p->filter_list);
#if 0
/* for future use */
qdisc_destroy(p->q);
@@ -362,16 +353,16 @@ static void ingress_destroy(struct Qdisc *sch)
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
rta = (struct rtattr *) b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1ccbfb55b0b8..5d9d8bc9cc3a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#define VERSION "1.2"
@@ -54,21 +55,22 @@
struct netem_sched_data {
struct Qdisc *qdisc;
- struct timer_list timer;
+ struct qdisc_watchdog watchdog;
+
+ psched_tdiff_t latency;
+ psched_tdiff_t jitter;
- u32 latency;
u32 loss;
u32 limit;
u32 counter;
u32 gap;
- u32 jitter;
u32 duplicate;
u32 reorder;
u32 corrupt;
struct crndstate {
- unsigned long last;
- unsigned long rho;
+ u32 last;
+ u32 rho;
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
struct disttable {
@@ -95,12 +97,12 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
* Next number depends on last value.
* rho is scaled to avoid floating point.
*/
-static unsigned long get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state)
{
u64 value, rho;
unsigned long answer;
- if (state->rho == 0) /* no correllation */
+ if (state->rho == 0) /* no correlation */
return net_random();
value = net_random();
@@ -114,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state)
* std deviation sigma. Uses table lookup to approximate the desired
* distribution, and a uniformly-distributed pseudo-random source.
*/
-static long tabledist(unsigned long mu, long sigma,
- struct crndstate *state, const struct disttable *dist)
+static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
+ struct crndstate *state,
+ const struct disttable *dist)
{
- long t, x;
- unsigned long rnd;
+ psched_tdiff_t x;
+ long t;
+ u32 rnd;
if (sigma == 0)
return mu;
@@ -213,8 +217,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
delay = tabledist(q->latency, q->jitter,
&q->delay_cor, q->delay_dist);
- PSCHED_GET_TIME(now);
- PSCHED_TADD2(now, delay, cb->time_to_send);
+ now = psched_get_time();
+ cb->time_to_send = now + delay;
++q->counter;
ret = q->qdisc->enqueue(skb, q->qdisc);
} else {
@@ -222,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* Do re-ordering by putting one out of N packets at the front
* of the queue.
*/
- PSCHED_GET_TIME(cb->time_to_send);
+ cb->time_to_send = psched_get_time();
q->counter = 0;
ret = q->qdisc->ops->requeue(skb, q->qdisc);
}
@@ -269,55 +273,43 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
+ smp_mb();
+ if (sch->flags & TCQ_F_THROTTLED)
+ return NULL;
+
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
- psched_time_t now;
+ psched_time_t now = psched_get_time();
/* if more time remaining? */
- PSCHED_GET_TIME(now);
-
- if (PSCHED_TLESS(cb->time_to_send, now)) {
+ if (cb->time_to_send <= now) {
pr_debug("netem_dequeue: return skb=%p\n", skb);
sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
return skb;
- } else {
- psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
-
- if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
- qdisc_tree_decrease_qlen(q->qdisc, 1);
- sch->qstats.drops++;
- printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
- q->qdisc->ops->id);
- }
+ }
- mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
- sch->flags |= TCQ_F_THROTTLED;
+ if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
+ qdisc_tree_decrease_qlen(q->qdisc, 1);
+ sch->qstats.drops++;
+ printk(KERN_ERR "netem: %s could not requeue\n",
+ q->qdisc->ops->id);
}
+
+ qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
}
return NULL;
}
-static void netem_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
-
- pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
static void netem_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
/* Pass size change message down to embedded FIFO */
@@ -438,10 +430,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
q->loss = qopt->loss;
q->duplicate = qopt->duplicate;
- /* for compatiablity with earlier versions.
- * if gap is set, need to assume 100% probablity
+ /* for compatibility with earlier versions.
+ * if gap is set, need to assume 100% probability
*/
- q->reorder = ~0;
+ if (q->gap)
+ q->reorder = ~0;
/* Handle nested options after initial queue options.
* Should have put all options in nested format but too late now.
@@ -487,22 +480,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
*/
struct fifo_sched_data {
u32 limit;
+ psched_time_t oldest;
};
static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct sk_buff_head *list = &sch->q;
- const struct netem_skb_cb *ncb
- = (const struct netem_skb_cb *)nskb->cb;
+ psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < q->limit)) {
+ /* Optimize for add at tail */
+ if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
+ q->oldest = tnext;
+ return qdisc_enqueue_tail(nskb, sch);
+ }
+
skb_queue_reverse_walk(list, skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
- if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
+ if (tnext >= cb->time_to_send)
break;
}
@@ -515,7 +514,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
- return qdisc_drop(nskb, sch);
+ return qdisc_reshape_fail(nskb, sch);
}
static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -531,6 +530,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
} else
q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
+ q->oldest = PSCHED_PASTPERFECT;
return 0;
}
@@ -567,9 +567,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
if (!opt)
return -EINVAL;
- init_timer(&q->timer);
- q->timer.function = netem_watchdog;
- q->timer.data = (unsigned long) sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
@@ -590,7 +588,7 @@ static void netem_destroy(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
qdisc_destroy(q->qdisc);
kfree(q->delay_dist);
}
@@ -598,7 +596,7 @@ static void netem_destroy(struct Qdisc *sch)
static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
{
const struct netem_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta = (struct rtattr *) b;
struct tc_netem_qopt qopt;
struct tc_netem_corr cor;
@@ -626,12 +624,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
corrupt.correlation = q->corrupt_cor.rho;
RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de889f23f22a..269a6e17c6c4 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -32,6 +32,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -61,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS;
case TC_ACT_SHOT:
return NULL;
- };
+ }
if (!q->filter_list ) {
#else
@@ -188,13 +189,8 @@ prio_destroy(struct Qdisc* sch)
{
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- struct tcf_proto *tp;
-
- while ((tp = q->filter_list) != NULL) {
- q->filter_list = tp->next;
- tcf_destroy(tp);
- }
+ tcf_destroy_chain(q->filter_list);
for (prio=0; prio<q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -271,7 +267,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt)
static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_prio_qopt opt;
opt.bands = q->bands;
@@ -280,7 +276,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66f32051a99b..96dfdf78d32c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -30,6 +30,7 @@
#include <linux/notifier.h>
#include <linux/init.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <linux/ipv6.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
h = iph->daddr;
h2 = iph->saddr^iph->protocol;
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -152,7 +153,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
}
case __constant_htons(ETH_P_IPV6):
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
h = iph->daddr.s6_addr32[3];
h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
if (iph->nexthdr == IPPROTO_TCP ||
@@ -461,7 +462,7 @@ static void sfq_destroy(struct Qdisc *sch)
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_sfq_qopt opt;
opt.quantum = q->quantum;
@@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 85da8daa61d2..53862953baaf 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -32,6 +32,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -127,8 +128,8 @@ struct tbf_sched_data
long tokens; /* Current number of B tokens */
long ptokens; /* Current number of P tokens */
psched_time_t t_c; /* Time check-point */
- struct timer_list wd_timer; /* Watchdog timer */
struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
+ struct qdisc_watchdog watchdog; /* Watchdog timer */
};
#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
@@ -185,14 +186,6 @@ static unsigned int tbf_drop(struct Qdisc* sch)
return len;
}
-static void tbf_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc*)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -202,13 +195,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
if (skb) {
psched_time_t now;
- long toks, delay;
+ long toks;
long ptoks = 0;
unsigned int len = skb->len;
- PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
if (q->P_tab) {
ptoks = toks + q->ptokens;
@@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
return skb;
}
- delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
-
- if (delay == 0)
- delay = 1;
-
- mod_timer(&q->wd_timer, jiffies+delay);
+ qdisc_watchdog_schedule(&q->watchdog,
+ now + max_t(long, -toks, -ptoks));
/* Maybe we have a shorter packet in the queue,
which can be sent now. It sounds cool,
@@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
sch->qstats.drops++;
}
- sch->flags |= TCQ_F_THROTTLED;
sch->qstats.overlimits++;
}
return NULL;
@@ -266,11 +253,10 @@ static void tbf_reset(struct Qdisc* sch)
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- PSCHED_GET_TIME(q->t_c);
+ q->t_c = psched_get_time();
q->tokens = q->buffer;
q->ptokens = q->mtu;
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
@@ -377,11 +363,8 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
if (opt == NULL)
return -EINVAL;
- PSCHED_GET_TIME(q->t_c);
- init_timer(&q->wd_timer);
- q->wd_timer.function = tbf_watchdog;
- q->wd_timer.data = (unsigned long)sch;
-
+ q->t_c = psched_get_time();
+ qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
return tbf_change(sch, opt);
@@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
if (q->P_tab)
qdisc_put_rtab(q->P_tab);
@@ -404,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch)
static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tbf_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_tbf_qopt opt;
@@ -420,12 +403,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.mtu = q->mtu;
opt.buffer = q->buffer;
RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 587123c61af9..d24914db7861 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -323,7 +323,7 @@ restart:
nores = 1;
break;
}
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
} while ((q = NEXT_SLAVE(q)) != start);
if (nores && skb_res == NULL) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fa82b73c965b..df94e3cdfba3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -143,7 +143,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the maximum mumber of new data packets that can be sent
* in a burst.
*/
- asoc->max_burst = sctp_max_burst;
+ asoc->max_burst = sp->max_burst;
/* initialize association timers */
asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
@@ -714,8 +714,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
/* Record the transition on the transport. */
switch (command) {
case SCTP_TRANSPORT_UP:
+ /* If we are moving from UNCONFIRMED state due
+ * to heartbeat success, report the SCTP_ADDR_CONFIRMED
+ * state to the user, otherwise report SCTP_ADDR_AVAILABLE.
+ */
+ if (SCTP_UNCONFIRMED == transport->state &&
+ SCTP_HEARTBEAT_SUCCESS == error)
+ spc_state = SCTP_ADDR_CONFIRMED;
+ else
+ spc_state = SCTP_ADDR_AVAILABLE;
transport->state = SCTP_ACTIVE;
- spc_state = SCTP_ADDR_AVAILABLE;
break;
case SCTP_TRANSPORT_DOWN:
@@ -725,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
default:
return;
- };
+ }
/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
* user.
@@ -1046,6 +1054,9 @@ void sctp_assoc_update(struct sctp_association *asoc,
trans = list_entry(pos, struct sctp_transport, transports);
if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr))
sctp_assoc_del_peer(asoc, &trans->ipaddr);
+
+ if (asoc->state >= SCTP_STATE_ESTABLISHED)
+ sctp_transport_reset(trans);
}
/* If the case is A (association restart), use
@@ -1063,6 +1074,18 @@ void sctp_assoc_update(struct sctp_association *asoc,
*/
sctp_ssnmap_clear(asoc->ssnmap);
+ /* Flush the ULP reassembly and ordered queue.
+ * Any data there will now be stale and will
+ * cause problems.
+ */
+ sctp_ulpq_flush(&asoc->ulpq);
+
+ /* reset the overall association error count so
+ * that the restarted association doesn't get torn
+ * down on the next retransmission timer.
+ */
+ asoc->overall_error_count = 0;
+
} else {
/* Add any peer addresses from the new association. */
list_for_each(pos, &new->peer.transport_addr_list) {
@@ -1080,6 +1103,13 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->ssnmap = new->ssnmap;
new->ssnmap = NULL;
}
+
+ if (!asoc->assoc_id) {
+ /* get a new association id since we don't have one
+ * yet.
+ */
+ sctp_assoc_set_id(asoc, GFP_ATOMIC);
+ }
}
}
@@ -1352,3 +1382,25 @@ out:
sctp_read_unlock(&asoc->base.addr_lock);
return found;
}
+
+/* Set an association id for a given association */
+int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
+{
+ int assoc_id;
+ int error = 0;
+retry:
+ if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+ return -ENOMEM;
+
+ spin_lock_bh(&sctp_assocs_id_lock);
+ error = idr_get_new_above(&sctp_assocs_id, (void *)asoc,
+ 1, &assoc_id);
+ spin_unlock_bh(&sctp_assocs_id_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ return error;
+
+ asoc->assoc_id = (sctp_assoc_t) assoc_id;
+ return error;
+}
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c9..e8c0f7435d7f 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
return "FWD_TSN";
default:
- return "unknown chunk";
- };
+ break;
+ }
+
return "unknown chunk";
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71db66873695..885109fb3dda 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -79,14 +79,10 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
/* Calculate the SCTP checksum of an SCTP packet. */
static inline int sctp_rcv_checksum(struct sk_buff *skb)
{
- struct sctphdr *sh;
- __u32 cmp, val;
struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- sh = (struct sctphdr *) skb->h.raw;
- cmp = ntohl(sh->checksum);
-
- val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+ struct sctphdr *sh = sctp_hdr(skb);
+ __u32 cmp = ntohl(sh->checksum);
+ __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
for (; list; list = list->next)
val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
@@ -138,14 +134,13 @@ int sctp_rcv(struct sk_buff *skb)
if (skb_linearize(skb))
goto discard_it;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
/* Pull up the IP and SCTP headers. */
- __skb_pull(skb, skb->h.raw - skb->data);
+ __skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
- (sctp_rcv_checksum(skb) < 0))
+ if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
goto discard_it;
skb_pull(skb, sizeof(struct sctphdr));
@@ -154,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->len < sizeof(struct sctp_chunkhdr))
goto discard_it;
- family = ipver2af(skb->nh.iph->version);
+ family = ipver2af(ip_hdr(skb)->version);
af = sctp_get_af_specific(family);
if (unlikely(!af))
goto discard_it;
@@ -510,30 +505,30 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
void sctp_v4_err(struct sk_buff *skb, __u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
- struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int ihlen = iph->ihl * 4;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
struct sctp_association *asoc = NULL;
struct sctp_transport *transport;
struct inet_sock *inet;
- char *saveip, *savesctp;
+ sk_buff_data_t saveip, savesctp;
int err;
- if (skb->len < ((iph->ihl << 2) + 8)) {
+ if (skb->len < ihlen + 8) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
/* Fix up skb to look at the embedded net header. */
- saveip = skb->nh.raw;
- savesctp = skb->h.raw;
- skb->nh.iph = iph;
- skb->h.raw = (char *)sh;
- sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
- /* Put back, the original pointers. */
- skb->nh.raw = saveip;
- skb->h.raw = savesctp;
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, ihlen);
+ sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+ /* Put back, the original values. */
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
@@ -616,7 +611,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
break;
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb->tail)
+ if (ch_end > skb_tail_pointer(skb))
break;
/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -648,7 +643,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
}
ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb->tail);
+ } while (ch_end < skb_tail_pointer(skb));
return 0;
@@ -905,7 +900,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
struct sctp_association *asoc;
union sctp_addr addr;
union sctp_addr *paddr = &addr;
- struct sctphdr *sh = (struct sctphdr *) skb->h.raw;
+ struct sctphdr *sh = sctp_hdr(skb);
sctp_chunkhdr_t *ch;
union sctp_params params;
sctp_init_chunk_t *init;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30629e17781..88aa22407549 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* the skb->tail.
*/
if (unlikely(skb_is_nonlinear(chunk->skb))) {
- if (chunk->chunk_end > chunk->skb->tail)
- chunk->chunk_end = chunk->skb->tail;
+ if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
}
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end < chunk->skb->tail) {
+ if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
- } else if (chunk->chunk_end > chunk->skb->tail) {
+ } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
/* RFC 2960, Section 6.10 Bundling
*
* Partial chunks MUST NOT be placed in an SCTP packet.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0b9c49b3a100..84cd53635fe8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,26 +122,24 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int type, int code, int offset, __be32 info)
{
struct inet6_dev *idev;
- struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
- struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
struct sock *sk;
struct sctp_association *asoc;
struct sctp_transport *transport;
struct ipv6_pinfo *np;
- char *saveip, *savesctp;
+ sk_buff_data_t saveip, savesctp;
int err;
idev = in6_dev_get(skb->dev);
/* Fix up skb to look at the embedded net header. */
- saveip = skb->nh.raw;
- savesctp = skb->h.raw;
- skb->nh.ipv6h = iph;
- skb->h.raw = (char *)sh;
- sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, offset);
+ sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original pointers. */
- skb->nh.raw = saveip;
- skb->h.raw = savesctp;
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
if (!sk) {
ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
goto out;
@@ -391,13 +389,13 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
addr->v6.sin6_flowinfo = 0; /* FIXME */
addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
- from = &skb->nh.ipv6h->saddr;
+ from = &ipv6_hdr(skb)->saddr;
} else {
*port = sh->dest;
- from = &skb->nh.ipv6h->daddr;
+ from = &ipv6_hdr(skb)->daddr;
}
ipv6_addr_copy(&addr->v6.sin6_addr, from);
}
@@ -606,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
default:
retval = SCTP_SCOPE_GLOBAL;
break;
- };
+ }
return retval;
}
@@ -699,7 +697,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v6_is_ce(const struct sk_buff *skb)
{
- return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20);
+ return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
}
/* Dump the v6 addr to the seq file. */
@@ -766,19 +764,19 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
if (msgname) {
sctp_inet6_msgname(msgname, addr_len);
sin6 = (struct sockaddr_in6 *)msgname;
- sh = (struct sctphdr *)skb->h.raw;
+ sh = sctp_hdr(skb);
sin6->sin6_port = sh->source;
/* Map ipv4 address into v4-mapped-on-v6 address. */
if (sctp_sk(skb->sk)->v4mapped &&
- skb->nh.iph->version == 4) {
+ ip_hdr(skb)->version == 4) {
sctp_v4_map_v6((union sctp_addr *)sin6);
- sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr;
+ sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
return;
}
/* Otherwise, just copy the v6 address. */
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
struct sctp_ulpevent *ev = sctp_skb2event(skb);
sin6->sin6_scope_id = ev->iif;
@@ -994,45 +992,52 @@ static struct sctp_pf sctp_pf_inet6_specific = {
.af = &sctp_ipv6_specific,
};
-/* Initialize IPv6 support and register with inet6 stack. */
+/* Initialize IPv6 support and register with socket layer. */
int sctp_v6_init(void)
{
- int rc = proto_register(&sctpv6_prot, 1);
+ int rc;
+
+ /* Register the SCTP specific PF_INET6 functions. */
+ sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
+ /* Register the SCTP specific AF_INET6 functions. */
+ sctp_register_af(&sctp_ipv6_specific);
+
+ rc = proto_register(&sctpv6_prot, 1);
if (rc)
- goto out;
- /* Register inet6 protocol. */
- rc = -EAGAIN;
- if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
- goto out_unregister_sctp_proto;
+ return rc;
/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
inet6_register_protosw(&sctpv6_seqpacket_protosw);
inet6_register_protosw(&sctpv6_stream_protosw);
- /* Register the SCTP specific PF_INET6 functions. */
- sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
-
- /* Register the SCTP specific AF_INET6 functions. */
- sctp_register_af(&sctp_ipv6_specific);
+ return 0;
+}
+/* Register with inet6 layer. */
+int sctp_v6_add_protocol(void)
+{
/* Register notifier for inet6 address additions/deletions. */
register_inet6addr_notifier(&sctp_inet6addr_notifier);
- rc = 0;
-out:
- return rc;
-out_unregister_sctp_proto:
- proto_unregister(&sctpv6_prot);
- goto out;
+
+ if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
+ return -EAGAIN;
+
+ return 0;
}
/* IPv6 specific exit support. */
void sctp_v6_exit(void)
{
- list_del(&sctp_ipv6_specific.list);
- inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
inet6_unregister_protosw(&sctpv6_stream_protosw);
- unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
proto_unregister(&sctpv6_prot);
+ list_del(&sctp_ipv6_specific.list);
+}
+
+/* Unregister with inet6 layer. */
+void sctp_v6_del_protocol(void)
+{
+ inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
+ unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced54..d85543def754 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
case SCTP_XMIT_OK:
case SCTP_XMIT_NAGLE_DELAY:
break;
- };
+ }
return retval;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627e..992f361084b7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
q->empty = 0;
break;
- };
+ }
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
/* Retrieve a new chunk to bundle. */
lchunk = sctp_list_dequeue(lqueue);
break;
- };
+ }
/* If we are here due to a retransmit timeout or a fast
* retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
default:
/* We built a chunk with an illegal type! */
BUG();
- };
+ }
}
/* Is it OK to send data chunks? */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
dbg_prt_state = 0;
dbg_ack_tsn = tsn;
- };
+ }
dbg_last_ack_tsn = tsn;
#endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
dbg_prt_state = 1;
dbg_kept_tsn = tsn;
- };
+ }
dbg_last_kept_tsn = tsn;
#endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
} else {
SCTP_DEBUG_PRINTK("\n");
}
- };
+ }
#endif /* SCTP_DEBUG */
if (transport) {
if (bytes_acked) {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e17a823ca90f..34bab36637ac 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -170,7 +170,7 @@ static void sctp_get_local_addr_list(void)
struct sctp_af *af;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
__list_for_each(pos, &sctp_address_families) {
af = list_entry(pos, struct sctp_af, list);
af->copy_addrlist(&sctp_local_addr_list, dev);
@@ -235,13 +235,13 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
port = &addr->v4.sin_port;
addr->v4.sin_family = AF_INET;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
- from = &skb->nh.iph->saddr;
+ from = &ip_hdr(skb)->saddr;
} else {
*port = sh->dest;
- from = &skb->nh.iph->daddr;
+ from = &ip_hdr(skb)->daddr;
}
memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
}
@@ -530,7 +530,7 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v4_is_ce(const struct sk_buff *skb)
{
- return INET_ECN_is_ce(skb->nh.iph->tos);
+ return INET_ECN_is_ce(ip_hdr(skb)->tos);
}
/* Create and initialize a new sk for the socket returned by accept(). */
@@ -731,15 +731,13 @@ static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
/* Initialize and copy out a msgname from an inbound skb. */
static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
{
- struct sctphdr *sh;
- struct sockaddr_in *sin;
-
if (msgname) {
+ struct sctphdr *sh = sctp_hdr(skb);
+ struct sockaddr_in *sin = (struct sockaddr_in *)msgname;
+
sctp_inet_msgname(msgname, len);
- sin = (struct sockaddr_in *)msgname;
- sh = (struct sctphdr *)skb->h.raw;
sin->sin_port = sh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
}
}
@@ -977,28 +975,14 @@ SCTP_STATIC __init int sctp_init(void)
if (!sctp_sanity_check())
goto out;
- status = proto_register(&sctp_prot, 1);
- if (status)
- goto out;
-
- /* Add SCTP to inet_protos hash table. */
- status = -EAGAIN;
- if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
- goto err_add_protocol;
-
- /* Add SCTP(TCP and UDP style) to inetsw linked list. */
- inet_register_protosw(&sctp_seqpacket_protosw);
- inet_register_protosw(&sctp_stream_protosw);
-
- /* Allocate a cache pools. */
+ /* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
sizeof(struct sctp_bind_bucket),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
-
if (!sctp_bucket_cachep)
- goto err_bucket_cachep;
+ goto out;
sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
sizeof(struct sctp_chunk),
@@ -1044,7 +1028,7 @@ SCTP_STATIC __init int sctp_init(void)
sctp_cookie_preserve_enable = 1;
/* Max.Burst - 4 */
- sctp_max_burst = SCTP_MAX_BURST;
+ sctp_max_burst = SCTP_DEFAULT_MAX_BURST;
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
@@ -1155,6 +1139,14 @@ SCTP_STATIC __init int sctp_init(void)
INIT_LIST_HEAD(&sctp_address_families);
sctp_register_af(&sctp_ipv4_specific);
+ status = proto_register(&sctp_prot, 1);
+ if (status)
+ goto err_proto_register;
+
+ /* Register SCTP(UDP and TCP style) with socket layer. */
+ inet_register_protosw(&sctp_seqpacket_protosw);
+ inet_register_protosw(&sctp_stream_protosw);
+
status = sctp_v6_init();
if (status)
goto err_v6_init;
@@ -1168,19 +1160,39 @@ SCTP_STATIC __init int sctp_init(void)
/* Initialize the local address list. */
INIT_LIST_HEAD(&sctp_local_addr_list);
-
sctp_get_local_addr_list();
/* Register notifier for inet address additions/deletions. */
register_inetaddr_notifier(&sctp_inetaddr_notifier);
+ /* Register SCTP with inet layer. */
+ if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) {
+ status = -EAGAIN;
+ goto err_add_protocol;
+ }
+
+ /* Register SCTP with inet6 layer. */
+ status = sctp_v6_add_protocol();
+ if (status)
+ goto err_v6_add_protocol;
+
__unsafe(THIS_MODULE);
status = 0;
out:
return status;
+err_v6_add_protocol:
+ inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+err_add_protocol:
+ sctp_free_local_addr_list();
+ sock_release(sctp_ctl_socket);
err_ctl_sock_init:
sctp_v6_exit();
err_v6_init:
+ inet_unregister_protosw(&sctp_stream_protosw);
+ inet_unregister_protosw(&sctp_seqpacket_protosw);
+ proto_unregister(&sctp_prot);
+err_proto_register:
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
free_pages((unsigned long)sctp_port_hashtable,
@@ -1194,19 +1206,13 @@ err_ehash_alloc:
sizeof(struct sctp_hashbucket)));
err_ahash_alloc:
sctp_dbg_objcnt_exit();
-err_init_proc:
sctp_proc_exit();
+err_init_proc:
cleanup_sctp_mibs();
err_init_mibs:
kmem_cache_destroy(sctp_chunk_cachep);
err_chunk_cachep:
kmem_cache_destroy(sctp_bucket_cachep);
-err_bucket_cachep:
- inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
- inet_unregister_protosw(&sctp_seqpacket_protosw);
- inet_unregister_protosw(&sctp_stream_protosw);
-err_add_protocol:
- proto_unregister(&sctp_prot);
goto out;
}
@@ -1217,8 +1223,9 @@ SCTP_STATIC __exit void sctp_exit(void)
* up all the remaining associations and all that memory.
*/
- /* Unregister notifier for inet address additions/deletions. */
- unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+ /* Unregister with inet6/inet layers. */
+ sctp_v6_del_protocol();
+ inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
/* Free the local address list. */
sctp_free_local_addr_list();
@@ -1226,7 +1233,16 @@ SCTP_STATIC __exit void sctp_exit(void)
/* Free the control endpoint. */
sock_release(sctp_ctl_socket);
+ /* Cleanup v6 initializations. */
sctp_v6_exit();
+
+ /* Unregister with socket layer. */
+ inet_unregister_protosw(&sctp_stream_protosw);
+ inet_unregister_protosw(&sctp_seqpacket_protosw);
+
+ /* Unregister notifier for inet address additions/deletions. */
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
@@ -1238,16 +1254,13 @@ SCTP_STATIC __exit void sctp_exit(void)
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
- kmem_cache_destroy(sctp_chunk_cachep);
- kmem_cache_destroy(sctp_bucket_cachep);
-
sctp_dbg_objcnt_exit();
sctp_proc_exit();
cleanup_sctp_mibs();
- inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
- inet_unregister_protosw(&sctp_seqpacket_protosw);
- inet_unregister_protosw(&sctp_stream_protosw);
+ kmem_cache_destroy(sctp_chunk_cachep);
+ kmem_cache_destroy(sctp_bucket_cachep);
+
proto_unregister(&sctp_prot);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7fb29d5a0c7..8d18f570c2e6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -86,7 +86,7 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
struct sctp_af *af;
int iif = 0;
- af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version));
+ af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
if (af)
iif = af->skb_iif(chunk->skb);
@@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
/* Adjust the chunk length field. */
chunk->chunk_hdr->length = htons(chunklen + padlen + len);
- chunk->chunk_end = chunk->skb->tail;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
return target;
}
@@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
/* Adjust the chunk length field. */
chunk->chunk_hdr->length =
htons(ntohs(chunk->chunk_hdr->length) + len);
- chunk->chunk_end = chunk->skb->tail;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
out:
return err;
@@ -1233,7 +1233,7 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
asoc->temp = 1;
skb = chunk->skb;
/* Create an entry for the source address of the packet. */
- af = sctp_get_af_specific(ipver2af(skb->nh.iph->version));
+ af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
if (unlikely(!af))
goto fail;
af->from_skb(&asoc->c.peer_addr, skb, 1);
@@ -1939,7 +1939,6 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
* association.
*/
if (!asoc->temp) {
- int assoc_id;
int error;
asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams,
@@ -1947,19 +1946,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
if (!asoc->ssnmap)
goto clean_up;
- retry:
- if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+ error = sctp_assoc_set_id(asoc, gfp);
+ if (error)
goto clean_up;
- spin_lock_bh(&sctp_assocs_id_lock);
- error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, 1,
- &assoc_id);
- spin_unlock_bh(&sctp_assocs_id_lock);
- if (error == -EAGAIN)
- goto retry;
- else if (error)
- goto clean_up;
-
- asoc->assoc_id = (sctp_assoc_t) assoc_id;
}
/* ADDIP Section 4.1 ASCONF Chunk Procedures
@@ -2077,7 +2066,7 @@ static int sctp_process_param(struct sctp_association *asoc,
default: /* Just ignore anything else. */
break;
- };
+ }
}
break;
@@ -2118,7 +2107,7 @@ static int sctp_process_param(struct sctp_association *asoc,
SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
ntohs(param.p->type), asoc);
break;
- };
+ }
return retval;
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 135567493119..d9fad4f6ffc3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -464,7 +464,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
struct sctp_ulpevent *event;
event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
- (__u16)error, 0, 0,
+ (__u16)error, 0, 0, NULL,
GFP_ATOMIC);
if (event)
@@ -492,8 +492,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
/* Cancel any partial delivery in progress. */
sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
- event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
- (__u16)error, 0, 0,
+ if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
+ event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+ (__u16)error, 0, 0, chunk,
+ GFP_ATOMIC);
+ else
+ event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+ (__u16)error, 0, 0, NULL,
GFP_ATOMIC);
if (event)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
@@ -857,6 +862,33 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
sk->sk_err = error;
}
+/* Helper function to generate an association change event */
+static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
+ struct sctp_association *asoc,
+ u8 state)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_assoc_change(asoc, 0, state, 0,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
+ NULL, GFP_ATOMIC);
+ if (ev)
+ sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
+/* Helper function to generate an adaptation indication event */
+static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+ struct sctp_association *asoc)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
+
+ if (ev)
+ sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
/* These three macros allow us to pull the debugging code out of the
* main flow of sctp_do_sm() to keep attention focused on the real
* functionality there.
@@ -1004,7 +1036,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
status, state, event_type, subtype.chunk);
BUG();
break;
- };
+ }
bail:
return error;
@@ -1480,11 +1512,20 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_SET_SK_ERR:
sctp_cmd_set_sk_err(asoc, cmd->obj.error);
break;
+ case SCTP_CMD_ASSOC_CHANGE:
+ sctp_cmd_assoc_change(commands, asoc,
+ cmd->obj.u8);
+ break;
+ case SCTP_CMD_ADAPTATION_IND:
+ sctp_cmd_adaptation_ind(commands, asoc);
+ break;
+
default:
printk(KERN_WARNING "Impossible command: %u, %p\n",
cmd->verb, cmd->obj.ptr);
break;
- };
+ }
+
if (error)
break;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 70c39eac0581..f02ce3dddb7b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -186,7 +186,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
* notification is passed to the upper layer.
*/
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
- 0, 0, 0, GFP_ATOMIC);
+ 0, 0, 0, NULL, GFP_ATOMIC);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ev));
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
case -SCTP_IERROR_BAD_SIG:
default:
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
- };
+ }
}
@@ -661,7 +661,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
new_asoc->c.sinit_num_ostreams,
new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
@@ -790,7 +790,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
0, asoc->c.sinit_num_ostreams,
asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
new_asoc->c.my_ttag = asoc->c.my_vtag;
new_asoc->c.peer_ttag = asoc->c.peer_vtag;
break;
- };
+ }
/* Other parameters for the endpoint SHOULD be copied from the
* existing parameters of the association (e.g. number of
@@ -1625,7 +1625,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
new_asoc->c.sinit_num_ostreams,
new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
@@ -1656,7 +1656,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
struct sctp_association *new_asoc)
{
sctp_init_chunk_t *peer_init;
- struct sctp_ulpevent *ev;
struct sctp_chunk *repl;
/* new_asoc is a brand-new association, so these are not yet
@@ -1687,34 +1686,28 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
* D) IMPLEMENTATION NOTE: An implementation may choose to
* send the Communication Up notification to the SCTP user
* upon reception of a valid COOKIE ECHO chunk.
+ *
+ * Sadly, this needs to be implemented as a side-effect, because
+ * we are not guaranteed to have set the association id of the real
+ * association and so these notifications need to be delayed until
+ * the association id is allocated.
*/
- ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
- if (!ev)
- goto nomem_ev;
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_CHANGE, SCTP_U8(SCTP_COMM_UP));
/* Sockets API Draft Section 5.3.1.6
* When a peer sends a Adaptation Layer Indication parameter , SCTP
* delivers this notification to inform the application that of the
* peers requested adaptation layer.
+ *
+ * This also needs to be done as a side effect for the same reason as
+ * above.
*/
- if (asoc->peer.adaptation_ind) {
- ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
- if (!ev)
- goto nomem_ev;
-
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
- SCTP_ULPEVENT(ev));
- }
+ if (asoc->peer.adaptation_ind)
+ sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
-nomem_ev:
- sctp_chunk_free(repl);
nomem:
return SCTP_DISPOSITION_NOMEM;
}
@@ -1786,7 +1779,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
SCTP_COMM_UP, 0,
asoc->c.sinit_num_ostreams,
asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -1904,7 +1897,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
case -SCTP_IERROR_BAD_SIG:
default:
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
- };
+ }
}
/* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1929,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
default: /* Discard packet for all others. */
retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
break;
- };
+ }
/* Delete the tempory new association. */
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -3035,7 +3028,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
* notification is passed to the upper layer.
*/
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
- 0, 0, 0, GFP_ATOMIC);
+ 0, 0, 0, NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -3115,7 +3108,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
break;
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb->tail)
+ if (ch_end > skb_tail_pointer(skb))
break;
if (SCTP_CID_SHUTDOWN_ACK == ch->type)
@@ -3130,7 +3123,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb->tail);
+ } while (ch_end < skb_tail_pointer(skb));
if (ootb_shut_ack)
sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
@@ -4342,8 +4335,24 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
void *arg,
sctp_cmd_seq_t *commands)
{
- return sctp_sf_heartbeat(ep, asoc, type, (struct sctp_transport *)arg,
- commands);
+ if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type,
+ (struct sctp_transport *)arg, commands))
+ return SCTP_DISPOSITION_NOMEM;
+
+ /*
+ * RFC 2960 (bis), section 8.3
+ *
+ * D) Request an on-demand HEARTBEAT on a specific destination
+ * transport address of a given association.
+ *
+ * The endpoint should increment the respective error counter of
+ * the destination transport address each time a HEARTBEAT is sent
+ * to that address and not acknowledged within one RTO.
+ *
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET,
+ SCTP_TRANSPORT(arg));
+ return SCTP_DISPOSITION_CONSUME;
}
/*
@@ -4800,7 +4809,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
default:
BUG();
break;
- };
+ }
if (!reply)
goto nomem;
@@ -5270,7 +5279,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
chunk->ecn_ce_done = 1;
af = sctp_get_af_specific(
- ipver2af(chunk->skb->nh.iph->version));
+ ipver2af(ip_hdr(chunk->skb)->version));
if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
/* Do real work as sideffect. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f4..523071c7902f 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
default:
/* Yikes! We got an illegal event type. */
return &bug;
- };
+ }
}
#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 536298c2eda2..9f1a908776de 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -627,6 +627,12 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
retval = -EINVAL;
goto err_bindx_rem;
}
+
+ if (!af->addr_valid(sa_addr, sp, NULL)) {
+ retval = -EADDRNOTAVAIL;
+ goto err_bindx_rem;
+ }
+
if (sa_addr->v4.sin_port != htons(bp->port)) {
retval = -EINVAL;
goto err_bindx_rem;
@@ -935,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
default:
err = -EINVAL;
break;
- };
+ }
out:
kfree(kaddrs);
@@ -966,6 +972,7 @@ static int __sctp_connect(struct sock* sk,
int walk_size = 0;
union sctp_addr *sa_addr;
void *addr_buf;
+ unsigned short port;
sp = sctp_sk(sk);
ep = sp->ep;
@@ -986,6 +993,7 @@ static int __sctp_connect(struct sock* sk,
while (walk_size < addrs_size) {
sa_addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(sa_addr->sa.sa_family);
+ port = ntohs(sa_addr->v4.sin_port);
/* If the address family is not supported or if this address
* causes the address buffer to overflow return EINVAL.
@@ -999,6 +1007,12 @@ static int __sctp_connect(struct sock* sk,
if (err)
goto out_free;
+ /* Make sure the destination port is correctly set
+ * in all addresses.
+ */
+ if (asoc && asoc->peer.port && asoc->peer.port != port)
+ goto out_free;
+
memcpy(&to, sa_addr, af->sockaddr_len);
/* Check if there already is a matching association on the
@@ -2033,6 +2047,10 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
* SPP_HB_DEMAND - Request a user initiated heartbeat
* to be made immediately.
*
+ * SPP_HB_TIME_IS_ZERO - Specify's that the time for
+ * heartbeat delayis to be set to the value of 0
+ * milliseconds.
+ *
* SPP_PMTUD_ENABLE - This field will enable PMTU
* discovery upon the specified address. Note that
* if the address feild is empty then all addresses
@@ -2075,13 +2093,30 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
return error;
}
- if (params->spp_hbinterval) {
- if (trans) {
- trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
- } else if (asoc) {
- asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
- } else {
- sp->hbinterval = params->spp_hbinterval;
+ /* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of
+ * this field is ignored. Note also that a value of zero indicates
+ * the current setting should be left unchanged.
+ */
+ if (params->spp_flags & SPP_HB_ENABLE) {
+
+ /* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is
+ * set. This lets us use 0 value when this flag
+ * is set.
+ */
+ if (params->spp_flags & SPP_HB_TIME_IS_ZERO)
+ params->spp_hbinterval = 0;
+
+ if (params->spp_hbinterval ||
+ (params->spp_flags & SPP_HB_TIME_IS_ZERO)) {
+ if (trans) {
+ trans->hbinterval =
+ msecs_to_jiffies(params->spp_hbinterval);
+ } else if (asoc) {
+ asoc->hbinterval =
+ msecs_to_jiffies(params->spp_hbinterval);
+ } else {
+ sp->hbinterval = params->spp_hbinterval;
+ }
}
}
@@ -2098,7 +2133,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_pathmtu) {
+ /* When Path MTU discovery is disabled the value specified here will
+ * be the "fixed" path mtu (i.e. the value of the spp_flags field must
+ * include the flag SPP_PMTUD_DISABLE for this field to have any
+ * effect).
+ */
+ if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
if (trans) {
trans->pathmtu = params->spp_pathmtu;
sctp_assoc_sync_pmtu(asoc);
@@ -2129,7 +2169,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_sackdelay) {
+ /* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the
+ * value of this field is ignored. Note also that a value of zero
+ * indicates the current setting should be left unchanged.
+ */
+ if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) {
if (trans) {
trans->sackdelay =
msecs_to_jiffies(params->spp_sackdelay);
@@ -2157,7 +2201,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_pathmaxrxt) {
+ /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value
+ * of this field is ignored. Note also that a value of zero
+ * indicates the current setting should be left unchanged.
+ */
+ if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) {
if (trans) {
trans->pathmaxrxt = params->spp_pathmaxrxt;
} else if (asoc) {
@@ -2249,7 +2297,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
return 0;
}
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -2786,6 +2834,102 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave. Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations. Some implementations
+ * may allow you to turn this value on or off. If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket). When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value. A non-zero value indicates that
+ * fragmented interleave is on. A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default. Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ int val;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+ return 0;
+}
+
+/*
+ * 7.1.25. Set or Get the sctp partial delivery point
+ * (SCTP_PARTIAL_DELIVERY_POINT)
+ * This option will set or get the SCTP partial delivery point. This
+ * point is the size of a message where the partial delivery API will be
+ * invoked to help free up rwnd space for the peer. Setting this to a
+ * lower value will cause partial delivery's to happen more often. The
+ * calls argument is an integer that sets or gets the partial delivery
+ * point.
+ */
+static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ u32 val;
+
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->pd_point = val;
+
+ return 0; /* is this the right error code? */
+}
+
+/*
+ * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
+ *
+ * This option will allow a user to change the maximum burst of packets
+ * that can be emitted by this association. Note that the default value
+ * is 4, and some implementations may restrict this setting so that it
+ * can only be lowered.
+ *
+ * NOTE: This text doesn't seem right. Do this on a socket basis with
+ * future associations inheriting the socket value.
+ */
+static int sctp_setsockopt_maxburst(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ int val;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ if (val < 0)
+ return -EINVAL;
+
+ sctp_sk(sk)->max_burst = val;
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2865,6 +3009,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_DELAYED_ACK_TIME:
retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
break;
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
+ break;
case SCTP_INITMSG:
retval = sctp_setsockopt_initmsg(sk, optval, optlen);
@@ -2900,11 +3047,16 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_CONTEXT:
retval = sctp_setsockopt_context(sk, optval, optlen);
break;
-
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+ break;
+ case SCTP_MAX_BURST:
+ retval = sctp_setsockopt_maxburst(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
- };
+ }
sctp_release_sock(sk);
@@ -3060,6 +3212,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_timetolive = 0;
sp->default_rcv_context = 0;
+ sp->max_burst = sctp_max_burst;
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
@@ -3128,8 +3281,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->pf = sctp_get_pf_specific(sk->sk_family);
/* Control variables for partial data delivery. */
- sp->pd_mode = 0;
+ atomic_set(&sp->pd_mode, 0);
skb_queue_head_init(&sp->pd_lobby);
+ sp->frag_interleave = 0;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -3636,7 +3790,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
return 0;
}
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -3841,7 +3995,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
memcpy(&temp, &from->ipaddr, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
- if(space_left < addrlen)
+ if (space_left < addrlen)
return -ENOMEM;
if (copy_to_user(to, &temp, addrlen))
return -EFAULT;
@@ -3930,8 +4084,9 @@ done:
/* Helper function that copies local addresses to user and returns the number
* of addresses copied.
*/
-static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
- void __user *to)
+static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
+ int max_addrs, void *to,
+ int *bytes_copied)
{
struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
@@ -3948,10 +4103,10 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (copy_to_user(to, &temp, addrlen))
- return -EFAULT;
+ memcpy(to, &temp, addrlen);
to += addrlen;
+ *bytes_copied += addrlen;
cnt ++;
if (cnt >= max_addrs) break;
}
@@ -3959,8 +4114,8 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
return cnt;
}
-static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
- void __user **to, size_t space_left)
+static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
+ size_t space_left, int *bytes_copied)
{
struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
@@ -3977,14 +4132,14 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if(space_left<addrlen)
+ if (space_left < addrlen)
return -ENOMEM;
- if (copy_to_user(*to, &temp, addrlen))
- return -EFAULT;
+ memcpy(to, &temp, addrlen);
- *to += addrlen;
+ to += addrlen;
cnt ++;
space_left -= addrlen;
+ bytes_copied += addrlen;
}
return cnt;
@@ -4008,6 +4163,8 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
int addrlen;
rwlock_t *addr_lock;
int err = 0;
+ void *addrs;
+ int bytes_copied = 0;
if (len != sizeof(struct sctp_getaddrs_old))
return -EINVAL;
@@ -4035,6 +4192,15 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
to = getaddrs.addrs;
+ /* Allocate space for a local instance of packed array to hold all
+ * the data. We store addresses here first and then put write them
+ * to the user in one shot.
+ */
+ addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num,
+ GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+
sctp_read_lock(addr_lock);
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
@@ -4044,13 +4210,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
- getaddrs.addr_num,
- to);
- if (cnt < 0) {
- err = cnt;
- goto unlock;
- }
+ cnt = sctp_copy_laddrs_old(sk, bp->port,
+ getaddrs.addr_num,
+ addrs, &bytes_copied);
goto copy_getaddrs;
}
}
@@ -4060,22 +4222,29 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (copy_to_user(to, &temp, addrlen)) {
- err = -EFAULT;
- goto unlock;
- }
+ memcpy(addrs, &temp, addrlen);
to += addrlen;
+ bytes_copied += addrlen;
cnt ++;
if (cnt >= getaddrs.addr_num) break;
}
copy_getaddrs:
+ sctp_read_unlock(addr_lock);
+
+ /* copy the entire address list into the user provided space */
+ if (copy_to_user(to, addrs, bytes_copied)) {
+ err = -EFAULT;
+ goto error;
+ }
+
+ /* copy the leading structure back to user */
getaddrs.addr_num = cnt;
if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
err = -EFAULT;
-unlock:
- sctp_read_unlock(addr_lock);
+error:
+ kfree(addrs);
return err;
}
@@ -4095,7 +4264,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
rwlock_t *addr_lock;
int err = 0;
size_t space_left;
- int bytes_copied;
+ int bytes_copied = 0;
+ void *addrs;
if (len <= sizeof(struct sctp_getaddrs))
return -EINVAL;
@@ -4123,6 +4293,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
to = optval + offsetof(struct sctp_getaddrs,addrs);
space_left = len - sizeof(struct sctp_getaddrs) -
offsetof(struct sctp_getaddrs,addrs);
+ addrs = kmalloc(space_left, GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
sctp_read_lock(addr_lock);
@@ -4133,11 +4306,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- cnt = sctp_copy_laddrs_to_user(sk, bp->port,
- &to, space_left);
+ cnt = sctp_copy_laddrs(sk, bp->port, addrs,
+ space_left, &bytes_copied);
if (cnt < 0) {
err = cnt;
- goto unlock;
+ goto error;
}
goto copy_getaddrs;
}
@@ -4148,26 +4321,31 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if(space_left < addrlen)
- return -ENOMEM; /*fixme: right error?*/
- if (copy_to_user(to, &temp, addrlen)) {
- err = -EFAULT;
- goto unlock;
+ if (space_left < addrlen) {
+ err = -ENOMEM; /*fixme: right error?*/
+ goto error;
}
+ memcpy(addrs, &temp, addrlen);
to += addrlen;
+ bytes_copied += addrlen;
cnt ++;
space_left -= addrlen;
}
copy_getaddrs:
+ sctp_read_unlock(addr_lock);
+
+ if (copy_to_user(to, addrs, bytes_copied)) {
+ err = -EFAULT;
+ goto error;
+ }
if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
return -EFAULT;
- bytes_copied = ((char __user *)to) - optval;
if (put_user(bytes_copied, optlen))
return -EFAULT;
-unlock:
- sctp_read_unlock(addr_lock);
+error:
+ kfree(addrs);
return err;
}
@@ -4530,6 +4708,77 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
+{
+ int val;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+
+ val = sctp_sk(sk)->frag_interleave;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * 7.1.25. Set or Get the sctp partial delivery point
+ * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point())
+ */
+static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ u32 val;
+
+ if (len < sizeof(u32))
+ return -EINVAL;
+
+ len = sizeof(u32);
+
+ val = sctp_sk(sk)->pd_point;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return -ENOTSUPP;
+}
+
+/*
+ * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
+ * (chapter and verse is quoted at sctp_setsockopt_maxburst())
+ */
+static int sctp_getsockopt_maxburst(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+
+ val = sctp_sk(sk)->max_burst;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return -ENOTSUPP;
+}
+
SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -4642,10 +4891,21 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_CONTEXT:
retval = sctp_getsockopt_context(sk, len, optval, optlen);
break;
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_MAX_BURST:
+ retval = sctp_getsockopt_maxburst(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
- };
+ }
sctp_release_sock(sk);
return retval;
@@ -4760,7 +5020,8 @@ pp_found:
struct hlist_node *node;
SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
- if (pp->fastreuse && sk->sk_reuse)
+ if (pp->fastreuse && sk->sk_reuse &&
+ sk->sk_state != SCTP_SS_LISTENING)
goto success;
/* Run through the list of sockets bound to the port
@@ -4777,7 +5038,8 @@ pp_found:
struct sctp_endpoint *ep2;
ep2 = sctp_sk(sk2)->ep;
- if (reuse && sk2->sk_reuse)
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != SCTP_SS_LISTENING)
continue;
if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
@@ -4798,9 +5060,13 @@ pp_not_found:
* if sk->sk_reuse is too (that is, if the caller requested
* SO_REUSEADDR on this socket -sk-).
*/
- if (hlist_empty(&pp->owner))
- pp->fastreuse = sk->sk_reuse ? 1 : 0;
- else if (pp->fastreuse && !sk->sk_reuse)
+ if (hlist_empty(&pp->owner)) {
+ if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+ pp->fastreuse = 1;
+ else
+ pp->fastreuse = 0;
+ } else if (pp->fastreuse &&
+ (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
pp->fastreuse = 0;
/* We are set, so fill up all the data in the hash table
@@ -4808,8 +5074,8 @@ pp_not_found:
* sockets FIXME: Blurry, NPI (ipg).
*/
success:
- inet_sk(sk)->num = snum;
if (!sctp_sk(sk)->bind_hash) {
+ inet_sk(sk)->num = snum;
sk_add_bind_node(sk, &pp->owner);
sctp_sk(sk)->bind_hash = pp;
}
@@ -4882,12 +5148,16 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
* This is not currently spelled out in the SCTP sockets
* extensions draft, but follows the practice as seen in TCP
* sockets.
+ *
+ * Additionally, turn off fastreuse flag since we are not listening
*/
+ sk->sk_state = SCTP_SS_LISTENING;
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
- }
- sk->sk_state = SCTP_SS_LISTENING;
+ } else
+ sctp_sk(sk)->bind_hash->fastreuse = 0;
+
sctp_hash_endpoint(ep);
return 0;
}
@@ -4925,11 +5195,13 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
* extensions draft, but follows the practice as seen in TCP
* sockets.
*/
+ sk->sk_state = SCTP_SS_LISTENING;
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
- }
- sk->sk_state = SCTP_SS_LISTENING;
+ } else
+ sctp_sk(sk)->bind_hash->fastreuse = 0;
+
sk->sk_max_ack_backlog = backlog;
sctp_hash_endpoint(ep);
return 0;
@@ -4970,7 +5242,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
break;
default:
break;
- };
+ }
+
if (err)
goto cleanup;
@@ -5233,7 +5506,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
default:
return -EINVAL;
- };
+ }
}
return 0;
}
@@ -5638,6 +5911,36 @@ void sctp_wait_for_close(struct sock *sk, long timeout)
finish_wait(sk->sk_sleep, &wait);
}
+static void sctp_sock_rfree_frag(struct sk_buff *skb)
+{
+ struct sk_buff *frag;
+
+ if (!skb->data_len)
+ goto done;
+
+ /* Don't forget the fragments. */
+ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next)
+ sctp_sock_rfree_frag(frag);
+
+done:
+ sctp_sock_rfree(skb);
+}
+
+static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
+{
+ struct sk_buff *frag;
+
+ if (!skb->data_len)
+ goto done;
+
+ /* Don't forget the fragments. */
+ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next)
+ sctp_skb_set_owner_r_frag(frag, sk);
+
+done:
+ sctp_skb_set_owner_r(skb, sk);
+}
+
/* Populate the fields of the newsk from the oldsk and migrate the assoc
* and its messages to the newsk.
*/
@@ -5692,10 +5995,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
event = sctp_skb2event(skb);
if (event->asoc == assoc) {
- sctp_sock_rfree(skb);
+ sctp_sock_rfree_frag(skb);
__skb_unlink(skb, &oldsk->sk_receive_queue);
__skb_queue_tail(&newsk->sk_receive_queue, skb);
- sctp_skb_set_owner_r(skb, newsk);
+ sctp_skb_set_owner_r_frag(skb, newsk);
}
}
@@ -5706,9 +6009,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
*/
skb_queue_head_init(&newsp->pd_lobby);
- sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;
+ atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
- if (sctp_sk(oldsk)->pd_mode) {
+ if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
struct sk_buff_head *queue;
/* Decide which queue to move pd_lobby skbs to. */
@@ -5723,10 +6026,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
event = sctp_skb2event(skb);
if (event->asoc == assoc) {
- sctp_sock_rfree(skb);
+ sctp_sock_rfree_frag(skb);
__skb_unlink(skb, &oldsp->pd_lobby);
__skb_queue_tail(queue, skb);
- sctp_skb_set_owner_r(skb, newsk);
+ sctp_skb_set_owner_r_frag(skb, newsk);
}
}
@@ -5734,8 +6037,18 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* delivery to finish.
*/
if (assoc->ulpq.pd_mode)
- sctp_clear_pd(oldsk);
+ sctp_clear_pd(oldsk, NULL);
+
+ }
+
+ sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) {
+ sctp_sock_rfree_frag(skb);
+ sctp_skb_set_owner_r_frag(skb, newsk);
+ }
+ sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) {
+ sctp_sock_rfree_frag(skb);
+ sctp_skb_set_owner_r_frag(skb, newsk);
}
/* Set the type of socket to indicate that it is peeled off from the
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a596f5308cb1..961df275d5b9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
transport->cwnd = max(transport->cwnd/2,
4*transport->asoc->pathmtu);
break;
- };
+ }
transport->partial_bytes_acked = 0;
SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
@@ -526,3 +526,35 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t)
timeout += jiffies;
return timeout;
}
+
+/* Reset transport variables to their initial values */
+void sctp_transport_reset(struct sctp_transport *t)
+{
+ struct sctp_association *asoc = t->asoc;
+
+ /* RFC 2960 (bis), Section 5.2.4
+ * All the congestion control parameters (e.g., cwnd, ssthresh)
+ * related to this peer MUST be reset to their initial values
+ * (see Section 6.2.1)
+ */
+ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
+ t->ssthresh = asoc->peer.i.a_rwnd;
+ t->rto = asoc->rto_initial;
+ t->rtt = 0;
+ t->srtt = 0;
+ t->rttvar = 0;
+
+ /* Reset these additional varibles so that we have a clean
+ * slate.
+ */
+ t->partial_bytes_acked = 0;
+ t->flight_size = 0;
+ t->error_count = 0;
+ t->rto_pending = 0;
+
+ /* Initialize the state information for SFR-CACC */
+ t->cacc.changeover_active = 0;
+ t->cacc.cycling_changeover = 0;
+ t->cacc.next_tsn_at_change = 0;
+ t->cacc.cacc_saw_newack = 0;
+}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2e11bc8d5d35..661ea2dd78ba 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -131,19 +131,54 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
const struct sctp_association *asoc,
__u16 flags, __u16 state, __u16 error, __u16 outbound,
- __u16 inbound, gfp_t gfp)
+ __u16 inbound, struct sctp_chunk *chunk, gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_assoc_change *sac;
struct sk_buff *skb;
- event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
+ /* If the lower layer passed in the chunk, it will be
+ * an ABORT, so we need to include it in the sac_info.
+ */
+ if (chunk) {
+ /* sctp_inqu_pop() has allready pulled off the chunk
+ * header. We need to put it back temporarily
+ */
+ skb_push(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+ /* Copy the chunk data to a new skb and reserve enough
+ * head room to use as notification.
+ */
+ skb = skb_copy_expand(chunk->skb,
+ sizeof(struct sctp_assoc_change), 0, gfp);
+
+ if (!skb)
+ goto fail;
+
+ /* put back the chunk header now that we have a copy */
+ skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+ /* Embed the event fields inside the cloned skb. */
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+ /* Include the notification structure */
+ sac = (struct sctp_assoc_change *)
+ skb_push(skb, sizeof(struct sctp_assoc_change));
+
+ /* Trim the buffer to the right length. */
+ skb_trim(skb, sizeof(struct sctp_assoc_change) +
+ ntohs(chunk->chunk_hdr->length));
+ } else {
+ event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
MSG_NOTIFICATION, gfp);
- if (!event)
- goto fail;
- skb = sctp_event2skb(event);
- sac = (struct sctp_assoc_change *)
- skb_put(skb, sizeof(struct sctp_assoc_change));
+ if (!event)
+ goto fail;
+
+ skb = sctp_event2skb(event);
+ sac = (struct sctp_assoc_change *) skb_put(skb,
+ sizeof(struct sctp_assoc_change));
+ }
/* Socket Extensions for SCTP
* 5.3.1.1 SCTP_ASSOC_CHANGE
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index f4759a9bdaee..34eb977a204d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -73,7 +73,7 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
/* Flush the reassembly and ordering queues. */
-static void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
+void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
{
struct sk_buff *skb;
struct sctp_ulpevent *event;
@@ -138,26 +138,59 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
/* Clear the partial delivery mode for this socket. Note: This
* assumes that no association is currently in partial delivery mode.
*/
-int sctp_clear_pd(struct sock *sk)
+int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
{
struct sctp_sock *sp = sctp_sk(sk);
- sp->pd_mode = 0;
- if (!skb_queue_empty(&sp->pd_lobby)) {
- struct list_head *list;
- sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
- list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
- INIT_LIST_HEAD(list);
- return 1;
+ if (atomic_dec_and_test(&sp->pd_mode)) {
+ /* This means there are no other associations in PD, so
+ * we can go ahead and clear out the lobby in one shot
+ */
+ if (!skb_queue_empty(&sp->pd_lobby)) {
+ struct list_head *list;
+ sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
+ list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
+ INIT_LIST_HEAD(list);
+ return 1;
+ }
+ } else {
+ /* There are other associations in PD, so we only need to
+ * pull stuff out of the lobby that belongs to the
+ * associations that is exiting PD (all of its notifications
+ * are posted here).
+ */
+ if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
+ struct sk_buff *skb, *tmp;
+ struct sctp_ulpevent *event;
+
+ sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
+ event = sctp_skb2event(skb);
+ if (event->asoc == asoc) {
+ __skb_unlink(skb, &sp->pd_lobby);
+ __skb_queue_tail(&sk->sk_receive_queue,
+ skb);
+ }
+ }
+ }
}
+
return 0;
}
+/* Set the pd_mode on the socket and ulpq */
+static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
+{
+ struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk);
+
+ atomic_inc(&sp->pd_mode);
+ ulpq->pd_mode = 1;
+}
+
/* Clear the pd_mode and restart any pending messages waiting for delivery. */
static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
{
ulpq->pd_mode = 0;
- return sctp_clear_pd(ulpq->asoc->base.sk);
+ return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
}
/* If the SKB of 'event' is on a list, it is the first such member
@@ -187,18 +220,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
* the association the cause of the partial delivery.
*/
- if (!sctp_sk(sk)->pd_mode) {
+ if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
queue = &sk->sk_receive_queue;
- } else if (ulpq->pd_mode) {
- if (event->msg_flags & MSG_NOTIFICATION)
- queue = &sctp_sk(sk)->pd_lobby;
- else {
- clear_pd = event->msg_flags & MSG_EOR;
- queue = &sk->sk_receive_queue;
+ } else {
+ if (ulpq->pd_mode) {
+ /* If the association is in partial delivery, we
+ * need to finish delivering the partially processed
+ * packet before passing any other data. This is
+ * because we don't truly support stream interleaving.
+ */
+ if ((event->msg_flags & MSG_NOTIFICATION) ||
+ (SCTP_DATA_NOT_FRAG ==
+ (event->msg_flags & SCTP_DATA_FRAG_MASK)))
+ queue = &sctp_sk(sk)->pd_lobby;
+ else {
+ clear_pd = event->msg_flags & MSG_EOR;
+ queue = &sk->sk_receive_queue;
+ }
+ } else {
+ /*
+ * If fragment interleave is enabled, we
+ * can queue this to the recieve queue instead
+ * of the lobby.
+ */
+ if (sctp_sk(sk)->frag_interleave)
+ queue = &sk->sk_receive_queue;
+ else
+ queue = &sctp_sk(sk)->pd_lobby;
}
- } else
- queue = &sctp_sk(sk)->pd_lobby;
-
+ }
/* If we are harvesting multiple skbs they will be
* collected on a list.
@@ -341,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
break;
pos->next = pnext;
pos = pnext;
- };
+ }
event = sctp_skb2event(f_frag);
SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -360,6 +410,11 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
struct sk_buff *first_frag = NULL;
__u32 ctsn, next_tsn;
struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ size_t pd_len = 0;
+ struct sctp_association *asoc;
+ u32 pd_point;
/* Initialized to 0 just to avoid compiler warning message. Will
* never be used with this value. It is referenced only after it
@@ -375,6 +430,10 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
* we expect to find the remaining middle fragments and the last
* fragment in order. If not, first_frag is reset to NULL and we
* start the next pass when we find another first fragment.
+ *
+ * There is a potential to do partial delivery if user sets
+ * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here
+ * to see if can do PD.
*/
skb_queue_walk(&ulpq->reasm, pos) {
cevent = sctp_skb2event(pos);
@@ -382,14 +441,32 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
case SCTP_DATA_FIRST_FRAG:
+ /* If this "FIRST_FRAG" is the first
+ * element in the queue, then count it towards
+ * possible PD.
+ */
+ if (pos == ulpq->reasm.next) {
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ } else {
+ pd_first = NULL;
+ pd_last = NULL;
+ pd_len = 0;
+ }
+
first_frag = pos;
next_tsn = ctsn + 1;
break;
case SCTP_DATA_MIDDLE_FRAG:
- if ((first_frag) && (ctsn == next_tsn))
+ if ((first_frag) && (ctsn == next_tsn)) {
next_tsn++;
- else
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else
first_frag = NULL;
break;
@@ -399,8 +476,29 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
else
first_frag = NULL;
break;
- };
+ }
+ }
+ asoc = ulpq->asoc;
+ if (pd_first) {
+ /* Make sure we can enter partial deliver.
+ * We can trigger partial delivery only if framgent
+ * interleave is set, or the socket is not already
+ * in partial delivery.
+ */
+ if (!sctp_sk(asoc->base.sk)->frag_interleave &&
+ atomic_read(&sctp_sk(asoc->base.sk)->pd_mode))
+ goto done;
+
+ cevent = sctp_skb2event(pd_first);
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(&ulpq->reasm,
+ pd_first,
+ pd_last);
+ if (retval)
+ sctp_ulpq_set_pd(ulpq);
+ }
}
done:
return retval;
@@ -458,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
goto done;
default:
return NULL;
- };
+ }
}
/* We have the reassembled event. There is no need to look
@@ -550,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
break;
default:
return NULL;
- };
+ }
}
/* We have the reassembled event. There is no need to look
@@ -819,19 +917,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
{
struct sctp_ulpevent *event;
struct sctp_association *asoc;
+ struct sctp_sock *sp;
asoc = ulpq->asoc;
+ sp = sctp_sk(asoc->base.sk);
- /* Are we already in partial delivery mode? */
- if (!sctp_sk(asoc->base.sk)->pd_mode) {
+ /* If the association is already in Partial Delivery mode
+ * we have noting to do.
+ */
+ if (ulpq->pd_mode)
+ return;
+ /* If the user enabled fragment interleave socket option,
+ * multiple associations can enter partial delivery.
+ * Otherwise, we can only enter partial delivery if the
+ * socket is not in partial deliver mode.
+ */
+ if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
/* Is partial delivery possible? */
event = sctp_ulpq_retrieve_first(ulpq);
/* Send event to the ULP. */
if (event) {
sctp_ulpq_tail_event(ulpq, event);
- sctp_sk(asoc->base.sk)->pd_mode = 1;
- ulpq->pd_mode = 1;
+ sctp_ulpq_set_pd(ulpq);
return;
}
}
diff --git a/net/socket.c b/net/socket.c
index 9566e57ac7f5..98a8f67abbfc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -261,8 +261,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
{
struct socket_alloc *ei = (struct socket_alloc *)foo;
- if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
- == SLAB_CTOR_CONSTRUCTOR)
+ if (flags & SLAB_CTOR_CONSTRUCTOR)
inode_init_once(&ei->vfs_inode);
}
@@ -314,8 +313,19 @@ static int sockfs_delete_dentry(struct dentry *dentry)
dentry->d_flags |= DCACHE_UNHASHED;
return 0;
}
+
+/*
+ * sockfs_dname() is called from d_path().
+ */
+static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
+ dentry->d_inode->i_ino);
+}
+
static struct dentry_operations sockfs_dentry_operations = {
.d_delete = sockfs_delete_dentry,
+ .d_dname = sockfs_dname,
};
/*
@@ -355,14 +365,9 @@ static int sock_alloc_fd(struct file **filep)
static int sock_attach_fd(struct socket *sock, struct file *file)
{
- struct qstr this;
- char name[32];
-
- this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
- this.name = name;
- this.hash = 0;
+ struct qstr name = { .name = "" };
- file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
+ file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
if (unlikely(!file->f_path.dentry))
return -ENOMEM;
@@ -585,6 +590,37 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
return result;
}
+/*
+ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+ */
+void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ ktime_t kt = skb->tstamp;
+
+ if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+ struct timeval tv;
+ /* Race occurred between timestamp enabling and packet
+ receiving. Fill in the current time for now. */
+ if (kt.tv64 == 0)
+ kt = ktime_get_real();
+ skb->tstamp = kt;
+ tv = ktime_to_timeval(kt);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+ } else {
+ struct timespec ts;
+ /* Race occurred between timestamp enabling and packet
+ receiving. Fill in the current time for now. */
+ if (kt.tv64 == 0)
+ kt = ktime_get_real();
+ skb->tstamp = kt;
+ ts = ktime_to_timespec(kt);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+ }
+}
+
+EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
@@ -1292,7 +1328,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if(sock) {
+ if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, address);
if (err >= 0) {
err = security_socket_bind(sock,
@@ -1381,7 +1417,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
err = sock_attach_fd(newsock, newfile);
if (err < 0)
- goto out_fd;
+ goto out_fd_simple;
err = security_socket_accept(sock, newsock);
if (err)
@@ -1414,6 +1450,11 @@ out_put:
fput_light(sock->file, fput_needed);
out:
return err;
+out_fd_simple:
+ sock_release(newsock);
+ put_filp(newfile);
+ put_unused_fd(newfd);
+ goto out_put;
out_fd:
fput(newfile);
put_unused_fd(newfd);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index cdcab9ca4c60..8ebfc4db7f51 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
- pmap_clnt.o timer.o xdr.o \
+ rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 104cbf4f769f..d158635de6c0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -123,9 +123,6 @@ spkm3_make_token(struct spkm3_ctx *ctx,
return GSS_S_COMPLETE;
out_err:
- if (md5cksum.data)
- kfree(md5cksum.data);
-
token->data = NULL;
token->len = 0;
return GSS_S_FAILURE;
@@ -152,7 +149,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
switch (cksumtype) {
case CKSUMTYPE_HMAC_MD5:
- cksumname = "md5";
+ cksumname = "hmac(md5)";
break;
default:
dprintk("RPC: spkm3_make_checksum:"
@@ -172,8 +169,12 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
if (err)
goto out;
+ err = crypto_hash_init(&desc);
+ if (err)
+ goto out;
+
sg_set_buf(sg, header, hdrlen);
- crypto_hash_update(&desc, sg, 1);
+ crypto_hash_update(&desc, sg, sg->length);
xdr_process_buf(body, body_offset, body->len - body_offset,
spkm3_checksummer, &desc);
@@ -184,5 +185,3 @@ out:
return err ? GSS_S_FAILURE : 0;
}
-
-EXPORT_SYMBOL(make_spkm3_checksum);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f02f24ae9468..543b085ae2c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1237,20 +1237,12 @@ static int content_open(struct inode *inode, struct file *file)
return res;
}
-static int content_release(struct inode *inode, struct file *file)
-{
- struct seq_file *m = (struct seq_file *)file->private_data;
- struct handle *han = m->private;
- kfree(han);
- m->private = NULL;
- return seq_release(inode, file);
-}
static const struct file_operations content_file_operations = {
.open = content_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = content_release,
+ .release = seq_release_private,
};
static ssize_t read_flush(struct file *file, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6d7221fe990a..d8fbee40a19c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -36,8 +36,6 @@
#include <linux/sunrpc/metrics.h>
-#define RPC_SLACK_SPACE (1024) /* total overkill */
-
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
@@ -747,21 +745,38 @@ call_reserveresult(struct rpc_task *task)
static void
call_allocate(struct rpc_task *task)
{
+ unsigned int slack = task->tk_auth->au_cslack;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = task->tk_xprt;
- unsigned int bufsiz;
+ struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
dprint_status(task);
+ task->tk_status = 0;
task->tk_action = call_bind;
+
if (req->rq_buffer)
return;
- /* FIXME: compute buffer requirements more exactly using
- * auth->au_wslack */
- bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
+ if (proc->p_proc != 0) {
+ BUG_ON(proc->p_arglen == 0);
+ if (proc->p_decode != NULL)
+ BUG_ON(proc->p_replen == 0);
+ }
- if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
+ /*
+ * Calculate the size (in quads) of the RPC call
+ * and reply headers, and convert both values
+ * to byte sizes.
+ */
+ req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+ req->rq_callsize <<= 2;
+ req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+ req->rq_rcvsize <<= 2;
+
+ req->rq_buffer = xprt->ops->buf_alloc(task,
+ req->rq_callsize + req->rq_rcvsize);
+ if (req->rq_buffer != NULL)
return;
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -788,6 +803,17 @@ rpc_task_force_reencode(struct rpc_task *task)
task->tk_rqstp->rq_snd_buf.len = 0;
}
+static inline void
+rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
+{
+ buf->head[0].iov_base = start;
+ buf->head[0].iov_len = len;
+ buf->tail[0].iov_len = 0;
+ buf->page_len = 0;
+ buf->len = 0;
+ buf->buflen = len;
+}
+
/*
* 3. Encode arguments of an RPC call
*/
@@ -795,28 +821,17 @@ static void
call_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- unsigned int bufsiz;
kxdrproc_t encode;
__be32 *p;
dprint_status(task);
- /* Default buffer setup */
- bufsiz = req->rq_bufsize >> 1;
- sndbuf->head[0].iov_base = (void *)req->rq_buffer;
- sndbuf->head[0].iov_len = bufsiz;
- sndbuf->tail[0].iov_len = 0;
- sndbuf->page_len = 0;
- sndbuf->len = 0;
- sndbuf->buflen = bufsiz;
- rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
- rcvbuf->head[0].iov_len = bufsiz;
- rcvbuf->tail[0].iov_len = 0;
- rcvbuf->page_len = 0;
- rcvbuf->len = 0;
- rcvbuf->buflen = bufsiz;
+ rpc_xdr_buf_init(&req->rq_snd_buf,
+ req->rq_buffer,
+ req->rq_callsize);
+ rpc_xdr_buf_init(&req->rq_rcv_buf,
+ (char *)req->rq_buffer + req->rq_callsize,
+ req->rq_rcvsize);
/* Encode header and provided arguments */
encode = task->tk_msg.rpc_proc->p_encode;
@@ -887,9 +902,11 @@ call_bind_status(struct rpc_task *task)
task->tk_pid);
break;
case -EPROTONOSUPPORT:
- dprintk("RPC: %5u remote rpcbind version 2 unavailable\n",
+ dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
task->tk_pid);
- break;
+ task->tk_status = 0;
+ task->tk_action = call_bind;
+ return;
default:
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
@@ -1046,6 +1063,8 @@ call_status(struct rpc_task *task)
rpc_delay(task, 3*HZ);
case -ETIMEDOUT:
task->tk_action = call_timeout;
+ if (task->tk_client->cl_discrtry)
+ xprt_disconnect(task->tk_xprt);
break;
case -ECONNREFUSED:
case -ENOTCONN:
@@ -1169,6 +1188,8 @@ call_decode(struct rpc_task *task)
out_retry:
req->rq_received = req->rq_private_buf.len = 0;
task->tk_status = 0;
+ if (task->tk_client->cl_discrtry)
+ xprt_disconnect(task->tk_xprt);
}
/*
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
deleted file mode 100644
index d9f765344589..000000000000
--- a/net/sunrpc/pmap_clnt.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * linux/net/sunrpc/pmap_clnt.c
- *
- * In-kernel RPC portmapper client.
- *
- * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/uio.h>
-#include <linux/in.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/sched.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_PMAP
-#endif
-
-#define PMAP_SET 1
-#define PMAP_UNSET 2
-#define PMAP_GETPORT 3
-
-struct portmap_args {
- u32 pm_prog;
- u32 pm_vers;
- u32 pm_prot;
- unsigned short pm_port;
- struct rpc_xprt * pm_xprt;
-};
-
-static struct rpc_procinfo pmap_procedures[];
-static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
-static void pmap_getport_done(struct rpc_task *, void *);
-static struct rpc_program pmap_program;
-
-static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
-{
- struct portmap_args *map = calldata;
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[PMAP_GETPORT],
- .rpc_argp = map,
- .rpc_resp = &map->pm_port,
- };
-
- rpc_call_setup(task, &msg, 0);
-}
-
-static inline struct portmap_args *pmap_map_alloc(void)
-{
- return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
-}
-
-static inline void pmap_map_free(struct portmap_args *map)
-{
- kfree(map);
-}
-
-static void pmap_map_release(void *data)
-{
- struct portmap_args *map = data;
-
- xprt_put(map->pm_xprt);
- pmap_map_free(map);
-}
-
-static const struct rpc_call_ops pmap_getport_ops = {
- .rpc_call_prepare = pmap_getport_prepare,
- .rpc_call_done = pmap_getport_done,
- .rpc_release = pmap_map_release,
-};
-
-static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
-{
- xprt_clear_binding(xprt);
- rpc_wake_up_status(&xprt->binding, status);
-}
-
-/**
- * rpc_getport - obtain the port for a given RPC service on a given host
- * @task: task that is waiting for portmapper request
- *
- * This one can be called for an ongoing RPC request, and can be used in
- * an async (rpciod) context.
- */
-void rpc_getport(struct rpc_task *task)
-{
- struct rpc_clnt *clnt = task->tk_client;
- struct rpc_xprt *xprt = task->tk_xprt;
- struct sockaddr_in addr;
- struct portmap_args *map;
- struct rpc_clnt *pmap_clnt;
- struct rpc_task *child;
- int status;
-
- dprintk("RPC: %5u rpc_getport(%s, %u, %u, %d)\n",
- task->tk_pid, clnt->cl_server,
- clnt->cl_prog, clnt->cl_vers, xprt->prot);
-
- /* Autobind on cloned rpc clients is discouraged */
- BUG_ON(clnt->cl_parent != clnt);
-
- status = -EACCES; /* tell caller to check again */
- if (xprt_test_and_set_binding(xprt))
- goto bailout_nowake;
-
- /* Put self on queue before sending rpcbind request, in case
- * pmap_getport_done completes before we return from rpc_run_task */
- rpc_sleep_on(&xprt->binding, task, NULL, NULL);
-
- /* Someone else may have bound if we slept */
- status = 0;
- if (xprt_bound(xprt))
- goto bailout_nofree;
-
- status = -ENOMEM;
- map = pmap_map_alloc();
- if (!map)
- goto bailout_nofree;
- map->pm_prog = clnt->cl_prog;
- map->pm_vers = clnt->cl_vers;
- map->pm_prot = xprt->prot;
- map->pm_port = 0;
- map->pm_xprt = xprt_get(xprt);
-
- rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
- pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
- status = PTR_ERR(pmap_clnt);
- if (IS_ERR(pmap_clnt))
- goto bailout;
-
- status = -EIO;
- child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
- if (IS_ERR(child))
- goto bailout_nofree;
- rpc_put_task(child);
-
- task->tk_xprt->stat.bind_count++;
- return;
-
-bailout:
- pmap_map_free(map);
- xprt_put(xprt);
-bailout_nofree:
- pmap_wake_portmap_waiters(xprt, status);
-bailout_nowake:
- task->tk_status = status;
-}
-
-#ifdef CONFIG_ROOT_NFS
-/**
- * rpc_getport_external - obtain the port for a given RPC service on a given host
- * @sin: address of remote peer
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- *
- * This one is called from outside the RPC client in a synchronous task context.
- */
-int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
-{
- struct portmap_args map = {
- .pm_prog = prog,
- .pm_vers = vers,
- .pm_prot = prot,
- .pm_port = 0
- };
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[PMAP_GETPORT],
- .rpc_argp = &map,
- .rpc_resp = &map.pm_port,
- };
- struct rpc_clnt *pmap_clnt;
- char hostname[32];
- int status;
-
- dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
- NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
-
- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
- pmap_clnt = pmap_create(hostname, sin, prot, 0);
- if (IS_ERR(pmap_clnt))
- return PTR_ERR(pmap_clnt);
-
- /* Setup the call info struct */
- status = rpc_call_sync(pmap_clnt, &msg, 0);
-
- if (status >= 0) {
- if (map.pm_port != 0)
- return map.pm_port;
- status = -EACCES;
- }
- return status;
-}
-#endif
-
-/*
- * Portmapper child task invokes this callback via tk_exit.
- */
-static void pmap_getport_done(struct rpc_task *child, void *data)
-{
- struct portmap_args *map = data;
- struct rpc_xprt *xprt = map->pm_xprt;
- int status = child->tk_status;
-
- if (status < 0) {
- /* Portmapper not available */
- xprt->ops->set_port(xprt, 0);
- } else if (map->pm_port == 0) {
- /* Requested RPC service wasn't registered */
- xprt->ops->set_port(xprt, 0);
- status = -EACCES;
- } else {
- /* Succeeded */
- xprt->ops->set_port(xprt, map->pm_port);
- xprt_set_bound(xprt);
- status = 0;
- }
-
- dprintk("RPC: %5u pmap_getport_done(status %d, port %u)\n",
- child->tk_pid, status, map->pm_port);
-
- pmap_wake_portmap_waiters(xprt, status);
-}
-
-/**
- * rpc_register - set or unset a port registration with the local portmapper
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- * @port: port value to register
- * @okay: result code
- *
- * port == 0 means unregister, port != 0 means register.
- */
-int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
- struct portmap_args map = {
- .pm_prog = prog,
- .pm_vers = vers,
- .pm_prot = prot,
- .pm_port = port,
- };
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
- .rpc_argp = &map,
- .rpc_resp = okay,
- };
- struct rpc_clnt *pmap_clnt;
- int error = 0;
-
- dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
- prog, vers, prot, port);
-
- pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
- if (IS_ERR(pmap_clnt)) {
- error = PTR_ERR(pmap_clnt);
- dprintk("RPC: couldn't create pmap client. Error = %d\n",
- error);
- return error;
- }
-
- error = rpc_call_sync(pmap_clnt, &msg, 0);
-
- if (error < 0) {
- printk(KERN_WARNING
- "RPC: failed to contact portmap (errno %d).\n",
- error);
- }
- dprintk("RPC: registration status %d/%d\n", error, *okay);
-
- /* Client deleted automatically because cl_oneshot == 1 */
- return error;
-}
-
-static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
-{
- struct rpc_create_args args = {
- .protocol = proto,
- .address = (struct sockaddr *)srvaddr,
- .addrsize = sizeof(*srvaddr),
- .servername = hostname,
- .program = &pmap_program,
- .version = RPC_PMAP_VERSION,
- .authflavor = RPC_AUTH_UNIX,
- .flags = (RPC_CLNT_CREATE_ONESHOT |
- RPC_CLNT_CREATE_NOPING),
- };
-
- srvaddr->sin_port = htons(RPC_PMAP_PORT);
- if (!privileged)
- args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
- return rpc_create(&args);
-}
-
-/*
- * XDR encode/decode functions for PMAP
- */
-static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map)
-{
- dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
- map->pm_prog, map->pm_vers,
- map->pm_prot, map->pm_port);
- *p++ = htonl(map->pm_prog);
- *p++ = htonl(map->pm_vers);
- *p++ = htonl(map->pm_prot);
- *p++ = htonl(map->pm_port);
-
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp)
-{
- *portp = (unsigned short) ntohl(*p++);
- return 0;
-}
-
-static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp)
-{
- *boolp = (unsigned int) ntohl(*p++);
- return 0;
-}
-
-static struct rpc_procinfo pmap_procedures[] = {
-[PMAP_SET] = {
- .p_proc = PMAP_SET,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_bool,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_SET,
- .p_name = "SET",
- },
-[PMAP_UNSET] = {
- .p_proc = PMAP_UNSET,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_bool,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_UNSET,
- .p_name = "UNSET",
- },
-[PMAP_GETPORT] = {
- .p_proc = PMAP_GETPORT,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_port,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_GETPORT,
- .p_name = "GETPORT",
- },
-};
-
-static struct rpc_version pmap_version2 = {
- .number = 2,
- .nrprocs = 4,
- .procs = pmap_procedures
-};
-
-static struct rpc_version * pmap_version[] = {
- NULL,
- NULL,
- &pmap_version2
-};
-
-static struct rpc_stat pmap_stats;
-
-static struct rpc_program pmap_program = {
- .name = "portmap",
- .number = RPC_PMAP_PROGRAM,
- .nrvers = ARRAY_SIZE(pmap_version),
- .version = pmap_version,
- .stats = &pmap_stats,
-};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9b9ea5045569..ad39b47e05bc 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -828,8 +828,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
{
struct rpc_inode *rpci = (struct rpc_inode *) foo;
- if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR) {
+ if (flags & SLAB_CTOR_CONSTRUCTOR) {
inode_init_once(&rpci->vfs_inode);
rpci->private = NULL;
rpci->nreaders = 0;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
new file mode 100644
index 000000000000..6c7aa8a1f0c6
--- /dev/null
+++ b/net/sunrpc/rpcb_clnt.c
@@ -0,0 +1,625 @@
+/*
+ * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind
+ * protocol
+ *
+ * Based on RFC 1833: "Binding Protocols for ONC RPC Version 2" and
+ * RFC 3530: "Network File System (NFS) version 4 Protocol"
+ *
+ * Original: Gilles Quillard, Bull Open Source, 2005 <gilles.quillard@bull.net>
+ * Updated: Chuck Lever, Oracle Corporation, 2007 <chuck.lever@oracle.com>
+ *
+ * Descended from net/sunrpc/pmap_clnt.c,
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_BIND
+#endif
+
+#define RPCBIND_PROGRAM (100000u)
+#define RPCBIND_PORT (111u)
+
+enum {
+ RPCBPROC_NULL,
+ RPCBPROC_SET,
+ RPCBPROC_UNSET,
+ RPCBPROC_GETPORT,
+ RPCBPROC_GETADDR = 3, /* alias for GETPORT */
+ RPCBPROC_DUMP,
+ RPCBPROC_CALLIT,
+ RPCBPROC_BCAST = 5, /* alias for CALLIT */
+ RPCBPROC_GETTIME,
+ RPCBPROC_UADDR2TADDR,
+ RPCBPROC_TADDR2UADDR,
+ RPCBPROC_GETVERSADDR,
+ RPCBPROC_INDIRECT,
+ RPCBPROC_GETADDRLIST,
+ RPCBPROC_GETSTAT,
+};
+
+#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT
+#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
+#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
+
+/*
+ * r_addr
+ *
+ * Quoting RFC 3530, section 2.2:
+ *
+ * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
+ * US-ASCII string:
+ *
+ * h1.h2.h3.h4.p1.p2
+ *
+ * The prefix, "h1.h2.h3.h4", is the standard textual form for
+ * representing an IPv4 address, which is always four octets long.
+ * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
+ * the first through fourth octets each converted to ASCII-decimal.
+ * Assuming big-endian ordering, p1 and p2 are, respectively, the first
+ * and second octets each converted to ASCII-decimal. For example, if a
+ * host, in big-endian order, has an address of 0x0A010307 and there is
+ * a service listening on, in big endian order, port 0x020F (decimal
+ * 527), then the complete universal address is "10.1.3.7.2.15".
+ *
+ * ...
+ *
+ * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
+ * US-ASCII string:
+ *
+ * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
+ *
+ * The suffix "p1.p2" is the service port, and is computed the same way
+ * as with universal addresses for TCP and UDP over IPv4. The prefix,
+ * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
+ * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
+ * Additionally, the two alternative forms specified in Section 2.2 of
+ * [RFC2373] are also acceptable.
+ *
+ * XXX: Currently this implementation does not explicitly convert the
+ * stored address to US-ASCII on non-ASCII systems.
+ */
+#define RPCB_MAXADDRLEN (128u)
+
+/*
+ * r_netid
+ *
+ * Quoting RFC 3530, section 2.2:
+ *
+ * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
+ * over IPv4 the value of r_netid is the string "udp".
+ *
+ * ...
+ *
+ * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
+ * over IPv6 the value of r_netid is the string "udp6".
+ */
+#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
+#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
+#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
+#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
+
+#define RPCB_MAXNETIDLEN (4u)
+
+/*
+ * r_owner
+ *
+ * The "owner" is allowed to unset a service in the rpcbind database.
+ * We always use the following (arbitrary) fixed string.
+ */
+#define RPCB_OWNER_STRING "rpcb"
+#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
+
+static void rpcb_getport_done(struct rpc_task *, void *);
+extern struct rpc_program rpcb_program;
+
+struct rpcbind_args {
+ struct rpc_xprt * r_xprt;
+
+ u32 r_prog;
+ u32 r_vers;
+ u32 r_prot;
+ unsigned short r_port;
+ char * r_netid;
+ char r_addr[RPCB_MAXADDRLEN];
+ char * r_owner;
+};
+
+static struct rpc_procinfo rpcb_procedures2[];
+static struct rpc_procinfo rpcb_procedures3[];
+
+static struct rpcb_info {
+ int rpc_vers;
+ struct rpc_procinfo * rpc_proc;
+} rpcb_next_version[];
+
+static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
+{
+ struct rpcbind_args *map = calldata;
+ struct rpc_xprt *xprt = map->r_xprt;
+ struct rpc_message msg = {
+ .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc,
+ .rpc_argp = map,
+ .rpc_resp = &map->r_port,
+ };
+
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void rpcb_map_release(void *data)
+{
+ struct rpcbind_args *map = data;
+
+ xprt_put(map->r_xprt);
+ kfree(map);
+}
+
+static const struct rpc_call_ops rpcb_getport_ops = {
+ .rpc_call_prepare = rpcb_getport_prepare,
+ .rpc_call_done = rpcb_getport_done,
+ .rpc_release = rpcb_map_release,
+};
+
+static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
+{
+ xprt_clear_binding(xprt);
+ rpc_wake_up_status(&xprt->binding, status);
+}
+
+static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
+ int proto, int version, int privileged)
+{
+ struct rpc_create_args args = {
+ .protocol = proto,
+ .address = srvaddr,
+ .addrsize = sizeof(struct sockaddr_in),
+ .servername = hostname,
+ .program = &rpcb_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = (RPC_CLNT_CREATE_ONESHOT |
+ RPC_CLNT_CREATE_NOPING),
+ };
+
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ if (!privileged)
+ args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ return rpc_create(&args);
+}
+
+/**
+ * rpcb_register - set or unset a port registration with the local rpcbind svc
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ * @port: port value to register
+ * @okay: result code
+ *
+ * port == 0 means unregister, port != 0 means register.
+ *
+ * This routine supports only rpcbind version 2.
+ */
+int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct rpcbind_args map = {
+ .r_prog = prog,
+ .r_vers = vers,
+ .r_prot = prot,
+ .r_port = port,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &rpcb_procedures2[port ?
+ RPCBPROC_SET : RPCBPROC_UNSET],
+ .rpc_argp = &map,
+ .rpc_resp = okay,
+ };
+ struct rpc_clnt *rpcb_clnt;
+ int error = 0;
+
+ dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
+ "rpcbind\n", (port ? "" : "un"),
+ prog, vers, prot, port);
+
+ rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
+ IPPROTO_UDP, 2, 1);
+ if (IS_ERR(rpcb_clnt))
+ return PTR_ERR(rpcb_clnt);
+
+ error = rpc_call_sync(rpcb_clnt, &msg, 0);
+
+ if (error < 0)
+ printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ "server (errno %d).\n", -error);
+ dprintk("RPC: registration status %d/%d\n", error, *okay);
+
+ return error;
+}
+
+#ifdef CONFIG_ROOT_NFS
+/**
+ * rpcb_getport_external - obtain the port for an RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * Called from outside the RPC client in a synchronous task context.
+ *
+ * For now, this supports only version 2 queries, but is used only by
+ * mount_clnt for NFS_ROOT.
+ */
+int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
+ __u32 vers, int prot)
+{
+ struct rpcbind_args map = {
+ .r_prog = prog,
+ .r_vers = vers,
+ .r_prot = prot,
+ .r_port = 0,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
+ .rpc_argp = &map,
+ .rpc_resp = &map.r_port,
+ };
+ struct rpc_clnt *rpcb_clnt;
+ char hostname[40];
+ int status;
+
+ dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
+ NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+
+ sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
+ rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+ if (IS_ERR(rpcb_clnt))
+ return PTR_ERR(rpcb_clnt);
+
+ status = rpc_call_sync(rpcb_clnt, &msg, 0);
+
+ if (status >= 0) {
+ if (map.r_port != 0)
+ return map.r_port;
+ status = -EACCES;
+ }
+ return status;
+}
+#endif
+
+/**
+ * rpcb_getport - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
+ */
+void rpcb_getport(struct rpc_task *task)
+{
+ struct rpc_clnt *clnt = task->tk_client;
+ int bind_version;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_clnt *rpcb_clnt;
+ static struct rpcbind_args *map;
+ struct rpc_task *child;
+ struct sockaddr addr;
+ int status;
+
+ dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n",
+ task->tk_pid, clnt->cl_server,
+ clnt->cl_prog, clnt->cl_vers, xprt->prot);
+
+ /* Autobind on cloned rpc clients is discouraged */
+ BUG_ON(clnt->cl_parent != clnt);
+
+ if (xprt_test_and_set_binding(xprt)) {
+ status = -EACCES; /* tell caller to check again */
+ dprintk("RPC: %5u rpcb_getport waiting for another binder\n",
+ task->tk_pid);
+ goto bailout_nowake;
+ }
+
+ /* Put self on queue before sending rpcbind request, in case
+ * rpcb_getport_done completes before we return from rpc_run_task */
+ rpc_sleep_on(&xprt->binding, task, NULL, NULL);
+
+ /* Someone else may have bound if we slept */
+ if (xprt_bound(xprt)) {
+ status = 0;
+ dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid);
+ goto bailout_nofree;
+ }
+
+ if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+ xprt->bind_index = 0;
+ status = -EACCES; /* tell caller to try again later */
+ dprintk("RPC: %5u rpcb_getport no more getport versions "
+ "available\n", task->tk_pid);
+ goto bailout_nofree;
+ }
+ bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+
+ dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n",
+ task->tk_pid, bind_version);
+
+ map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+ if (!map) {
+ status = -ENOMEM;
+ dprintk("RPC: %5u rpcb_getport no memory available\n",
+ task->tk_pid);
+ goto bailout_nofree;
+ }
+ map->r_prog = clnt->cl_prog;
+ map->r_vers = clnt->cl_vers;
+ map->r_prot = xprt->prot;
+ map->r_port = 0;
+ map->r_xprt = xprt_get(xprt);
+ map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
+ RPCB_NETID_UDP;
+ memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
+ sizeof(map->r_addr));
+ map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
+
+ rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+ rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
+ if (IS_ERR(rpcb_clnt)) {
+ status = PTR_ERR(rpcb_clnt);
+ dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n",
+ task->tk_pid, PTR_ERR(rpcb_clnt));
+ goto bailout;
+ }
+
+ child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+ if (IS_ERR(child)) {
+ status = -EIO;
+ dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
+ task->tk_pid);
+ goto bailout_nofree;
+ }
+ rpc_put_task(child);
+
+ task->tk_xprt->stat.bind_count++;
+ return;
+
+bailout:
+ kfree(map);
+ xprt_put(xprt);
+bailout_nofree:
+ rpcb_wake_rpcbind_waiters(xprt, status);
+bailout_nowake:
+ task->tk_status = status;
+}
+
+/*
+ * Rpcbind child task calls this callback via tk_exit.
+ */
+static void rpcb_getport_done(struct rpc_task *child, void *data)
+{
+ struct rpcbind_args *map = data;
+ struct rpc_xprt *xprt = map->r_xprt;
+ int status = child->tk_status;
+
+ /* rpcbind server doesn't support this rpcbind protocol version */
+ if (status == -EPROTONOSUPPORT)
+ xprt->bind_index++;
+
+ if (status < 0) {
+ /* rpcbind server not available on remote host? */
+ xprt->ops->set_port(xprt, 0);
+ } else if (map->r_port == 0) {
+ /* Requested RPC service wasn't registered on remote host */
+ xprt->ops->set_port(xprt, 0);
+ status = -EACCES;
+ } else {
+ /* Succeeded */
+ xprt->ops->set_port(xprt, map->r_port);
+ xprt_set_bound(xprt);
+ status = 0;
+ }
+
+ dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
+ child->tk_pid, status, map->r_port);
+
+ rpcb_wake_rpcbind_waiters(xprt, status);
+}
+
+static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
+ struct rpcbind_args *rpcb)
+{
+ dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n",
+ rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
+ *p++ = htonl(rpcb->r_prog);
+ *p++ = htonl(rpcb->r_vers);
+ *p++ = htonl(rpcb->r_prot);
+ *p++ = htonl(rpcb->r_port);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
+ unsigned short *portp)
+{
+ *portp = (unsigned short) ntohl(*p++);
+ dprintk("RPC: rpcb_decode_getport result %u\n",
+ *portp);
+ return 0;
+}
+
+static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
+ unsigned int *boolp)
+{
+ *boolp = (unsigned int) ntohl(*p++);
+ dprintk("RPC: rpcb_decode_set result %u\n",
+ *boolp);
+ return 0;
+}
+
+static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
+ struct rpcbind_args *rpcb)
+{
+ dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n",
+ rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
+ *p++ = htonl(rpcb->r_prog);
+ *p++ = htonl(rpcb->r_vers);
+
+ p = xdr_encode_string(p, rpcb->r_netid);
+ p = xdr_encode_string(p, rpcb->r_addr);
+ p = xdr_encode_string(p, rpcb->r_owner);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ return 0;
+}
+
+static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
+ unsigned short *portp)
+{
+ char *addr;
+ int addr_len, c, i, f, first, val;
+
+ *portp = 0;
+ addr_len = (unsigned int) ntohl(*p++);
+ if (addr_len > RPCB_MAXADDRLEN) /* sanity */
+ return -EINVAL;
+
+ dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
+ (char *) p);
+
+ addr = (char *)p;
+ val = 0;
+ first = 1;
+ f = 1;
+ for (i = addr_len - 1; i > 0; i--) {
+ c = addr[i];
+ if (c >= '0' && c <= '9') {
+ val += (c - '0') * f;
+ f *= 10;
+ } else if (c == '.') {
+ if (first) {
+ *portp = val;
+ val = first = 0;
+ f = 1;
+ } else {
+ *portp |= (val << 8);
+ break;
+ }
+ }
+ }
+
+ dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
+ return 0;
+}
+
+#define RPCB_program_sz (1u)
+#define RPCB_version_sz (1u)
+#define RPCB_protocol_sz (1u)
+#define RPCB_port_sz (1u)
+#define RPCB_boolean_sz (1u)
+
+#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
+#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
+
+#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \
+ RPCB_protocol_sz+RPCB_port_sz
+#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \
+ RPCB_netid_sz+RPCB_addr_sz+ \
+ RPCB_ownerstring_sz
+
+#define RPCB_setres_sz RPCB_boolean_sz
+#define RPCB_getportres_sz RPCB_port_sz
+
+/*
+ * Note that RFC 1833 does not put any size restrictions on the
+ * address string returned by the remote rpcbind database.
+ */
+#define RPCB_getaddrres_sz RPCB_addr_sz
+
+#define PROC(proc, argtype, restype) \
+ [RPCBPROC_##proc] = { \
+ .p_proc = RPCBPROC_##proc, \
+ .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \
+ .p_decode = (kxdrproc_t) rpcb_decode_##restype, \
+ .p_arglen = RPCB_##argtype##args_sz, \
+ .p_replen = RPCB_##restype##res_sz, \
+ .p_statidx = RPCBPROC_##proc, \
+ .p_timer = 0, \
+ .p_name = #proc, \
+ }
+
+/*
+ * Not all rpcbind procedures described in RFC 1833 are implemented
+ * since the Linux kernel RPC code requires only these.
+ */
+static struct rpc_procinfo rpcb_procedures2[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETADDR, mapping, getport),
+};
+
+static struct rpc_procinfo rpcb_procedures3[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETADDR, getaddr, getaddr),
+};
+
+static struct rpc_procinfo rpcb_procedures4[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETVERSADDR, getaddr, getaddr),
+};
+
+static struct rpcb_info rpcb_next_version[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+ { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+ { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+ { 2, &rpcb_procedures2[RPCBPROC_GETPORT] },
+ { 0, NULL },
+};
+
+static struct rpc_version rpcb_version2 = {
+ .number = 2,
+ .nrprocs = RPCB_HIGHPROC_2,
+ .procs = rpcb_procedures2
+};
+
+static struct rpc_version rpcb_version3 = {
+ .number = 3,
+ .nrprocs = RPCB_HIGHPROC_3,
+ .procs = rpcb_procedures3
+};
+
+static struct rpc_version rpcb_version4 = {
+ .number = 4,
+ .nrprocs = RPCB_HIGHPROC_4,
+ .procs = rpcb_procedures4
+};
+
+static struct rpc_version *rpcb_version[] = {
+ NULL,
+ NULL,
+ &rpcb_version2,
+ &rpcb_version3,
+ &rpcb_version4
+};
+
+static struct rpc_stat rpcb_stats;
+
+struct rpc_program rpcb_program = {
+ .name = "rpcbind",
+ .number = RPCBIND_PROGRAM,
+ .nrvers = ARRAY_SIZE(rpcb_version),
+ .version = rpcb_version,
+ .stats = &rpcb_stats,
+};
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6d87320074b1..4a53e94f8134 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -741,50 +741,53 @@ static void rpc_async_schedule(struct work_struct *work)
* @task: RPC task that will use this buffer
* @size: requested byte size
*
- * We try to ensure that some NFS reads and writes can always proceed
- * by using a mempool when allocating 'small' buffers.
+ * To prevent rpciod from hanging, this allocator never sleeps,
+ * returning NULL if the request cannot be serviced immediately.
+ * The caller can arrange to sleep in a way that is safe for rpciod.
+ *
+ * Most requests are 'small' (under 2KiB) and can be serviced from a
+ * mempool, ensuring that NFS reads and writes can always proceed,
+ * and that there is good locality of reference for these buffers.
+ *
* In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
+ * NFS requests, we avoid using GFP_KERNEL.
*/
-void * rpc_malloc(struct rpc_task *task, size_t size)
+void *rpc_malloc(struct rpc_task *task, size_t size)
{
- struct rpc_rqst *req = task->tk_rqstp;
- gfp_t gfp;
+ size_t *buf;
+ gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
- if (task->tk_flags & RPC_TASK_SWAPPER)
- gfp = GFP_ATOMIC;
+ size += sizeof(size_t);
+ if (size <= RPC_BUFFER_MAXSIZE)
+ buf = mempool_alloc(rpc_buffer_mempool, gfp);
else
- gfp = GFP_NOFS;
-
- if (size > RPC_BUFFER_MAXSIZE) {
- req->rq_buffer = kmalloc(size, gfp);
- if (req->rq_buffer)
- req->rq_bufsize = size;
- } else {
- req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
- if (req->rq_buffer)
- req->rq_bufsize = RPC_BUFFER_MAXSIZE;
- }
- return req->rq_buffer;
+ buf = kmalloc(size, gfp);
+ *buf = size;
+ dprintk("RPC: %5u allocated buffer of size %u at %p\n",
+ task->tk_pid, size, buf);
+ return (void *) ++buf;
}
/**
* rpc_free - free buffer allocated via rpc_malloc
- * @task: RPC task with a buffer to be freed
+ * @buffer: buffer to free
*
*/
-void rpc_free(struct rpc_task *task)
+void rpc_free(void *buffer)
{
- struct rpc_rqst *req = task->tk_rqstp;
+ size_t size, *buf = (size_t *) buffer;
- if (req->rq_buffer) {
- if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
- mempool_free(req->rq_buffer, rpc_buffer_mempool);
- else
- kfree(req->rq_buffer);
- req->rq_buffer = NULL;
- req->rq_bufsize = 0;
- }
+ if (!buffer)
+ return;
+ size = *buf;
+ buf--;
+
+ dprintk("RPC: freeing buffer of size %u at %p\n",
+ size, buf);
+ if (size <= RPC_BUFFER_MAXSIZE)
+ mempool_free(buf, rpc_buffer_mempool);
+ else
+ kfree(buf);
}
/*
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04d..1d377d1ab7f4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
desc.offset = sizeof(struct udphdr);
desc.count = skb->len - desc.offset;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+ if (skb_csum_unnecessary(skb))
goto no_checksum;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b4db53ff1435..b7503c103ae8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -757,7 +757,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
+ error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
if (error < 0)
break;
if (port && !dummy) {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 9bae4090254c..2bd23ea2aa8b 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -383,7 +383,10 @@ void svcauth_unix_purge(void)
static inline struct ip_map *
ip_map_cached_get(struct svc_rqst *rqstp)
{
- struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
+ struct ip_map *ipm;
+ struct svc_sock *svsk = rqstp->rq_sock;
+ spin_lock_bh(&svsk->sk_defer_lock);
+ ipm = svsk->sk_info_authunix;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
@@ -391,12 +394,14 @@ ip_map_cached_get(struct svc_rqst *rqstp)
* remembered, e.g. by a second mount from the
* same IP address.
*/
- rqstp->rq_sock->sk_info_authunix = NULL;
+ svsk->sk_info_authunix = NULL;
+ spin_unlock_bh(&svsk->sk_defer_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
}
+ spin_unlock_bh(&svsk->sk_defer_lock);
return ipm;
}
@@ -405,9 +410,15 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
{
struct svc_sock *svsk = rqstp->rq_sock;
- if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
- svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */
- else
+ spin_lock_bh(&svsk->sk_defer_lock);
+ if (svsk->sk_sock->type == SOCK_STREAM &&
+ svsk->sk_info_authunix == NULL) {
+ /* newly cached, keep the reference */
+ svsk->sk_info_authunix = ipm;
+ ipm = NULL;
+ }
+ spin_unlock_bh(&svsk->sk_defer_lock);
+ if (ipm)
cache_put(&ipm->h, &ip_map_cache);
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f6e1eb1ea720..22f61aee4824 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -452,6 +452,8 @@ union svc_pktinfo_u {
struct in_pktinfo pkti;
struct in6_pktinfo pkti6;
};
+#define SVC_PKTINFO_SPACE \
+ CMSG_SPACE(sizeof(union svc_pktinfo_u))
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
{
@@ -491,8 +493,11 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
struct svc_sock *svsk = rqstp->rq_sock;
struct socket *sock = svsk->sk_sock;
int slen;
- char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
- struct cmsghdr *cmh = (struct cmsghdr *)buffer;
+ union {
+ struct cmsghdr hdr;
+ long all[SVC_PKTINFO_SPACE / sizeof(long)];
+ } buffer;
+ struct cmsghdr *cmh = &buffer.hdr;
int len = 0;
int result;
int size;
@@ -745,8 +750,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
- char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
- struct cmsghdr *cmh = (struct cmsghdr *)buffer;
+ union {
+ struct cmsghdr hdr;
+ long all[SVC_PKTINFO_SPACE / sizeof(long)];
+ } buffer;
+ struct cmsghdr *cmh = &buffer.hdr;
int err, len;
struct msghdr msg = {
.msg_name = svc_addr(rqstp),
@@ -779,8 +787,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
}
clear_bit(SK_DATA, &svsk->sk_flags);
- while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
- 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+ while ((err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+ 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
(skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
svc_sock_received(svsk);
@@ -790,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
dprintk("svc: recvfrom returned error %d\n", -err);
}
rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
- if (skb->tstamp.off_sec == 0) {
- struct timeval tv;
-
- tv.tv_sec = xtime.tv_sec;
- tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
- skb_set_timestamp(skb, &tv);
+ if (skb->tstamp.tv64 == 0) {
+ skb->tstamp = ktime_get_real();
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+ svsk->sk_sk->sk_stamp = skb->tstamp;
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ee6ffa01dfb1..5b05b73e4c1d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -735,16 +735,6 @@ void xprt_transmit(struct rpc_task *task)
xprt_reset_majortimeo(req);
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
- } else {
- /* If all request bytes have been sent,
- * then we must be retransmitting this one */
- if (!req->rq_bytes_sent) {
- if (task->tk_client->cl_discrtry) {
- xprt_disconnect(xprt);
- task->tk_status = -ENOTCONN;
- return;
- }
- }
}
} else if (!req->rq_bytes_sent)
return;
@@ -833,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
- req->rq_bufsize = 0;
req->rq_xid = xprt_alloc_xid(xprt);
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
@@ -865,7 +854,7 @@ void xprt_release(struct rpc_task *task)
mod_timer(&xprt->timer,
xprt->last_used + xprt->idle_timeout);
spin_unlock_bh(&xprt->transport_lock);
- xprt->ops->buf_free(task);
+ xprt->ops->buf_free(req->rq_buffer);
task->tk_rqstp = NULL;
if (req->rq_release_snd_buf)
req->rq_release_snd_buf(req);
@@ -938,6 +927,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
xprt->timer.data = (unsigned long) xprt;
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
+ xprt->bind_index = 0;
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a5a32029e728..cc33c5880abb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1476,7 +1476,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
- .rpcbind = rpc_getport,
+ .rpcbind = rpcb_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1493,7 +1493,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
static struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
- .rpcbind = rpc_getport,
+ .rpcbind = rpcb_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 3891cc00087d..f9e367d946eb 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -18,7 +18,7 @@ config TIPC
This protocol support is also available as a module ( = code which
can be inserted in and removed from the running kernel whenever you
want). The module will be called tipc. If you want to compile it
- as a module, say M here and read <file:Documentation/modules.txt>.
+ as a module, say M here and read <file:Documentation/kbuild/modules.txt>.
If in doubt, say N.
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 14789a82de53..c71337a22d33 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
void *tlv_data, int tlv_data_size)
{
- struct tlv_desc *tlv = (struct tlv_desc *)buf->tail;
+ struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
int new_tlv_space = TLV_SPACE(tlv_data_size);
if (skb_tailroom(buf) < new_tlv_space) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9be4839e32c5..0ee6ded18f3a 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -73,7 +73,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
clone = skb_clone(buf, GFP_ATOMIC);
if (clone) {
- clone->nh.raw = clone->data;
+ skb_reset_network_header(clone);
dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
clone->dev = dev;
dev->hard_header(clone, dev, ETH_P_TIPC,
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
if (likely(eb_ptr->bearer)) {
if (likely(!dev->promiscuity) ||
- !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
- !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+ !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+ !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
size = msg_size((struct tipc_msg *)buf->data);
skb_trim(buf, size);
if (likely(buf->len == size)) {
@@ -120,16 +120,18 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
static int enable_bearer(struct tipc_bearer *tb_ptr)
{
- struct net_device *dev = dev_base;
+ struct net_device *dev, *pdev;
struct eth_bearer *eb_ptr = &eth_bearers[0];
struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
/* Find device with specified name */
-
- while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) {
- dev = dev->next;
- }
+ dev = NULL;
+ for_each_netdev(pdev)
+ if (!strncmp(dev->name, driver_name, IFNAMSIZ)) {
+ dev = pdev;
+ break;
+ }
if (!dev)
return -ENODEV;
@@ -140,7 +142,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
return -EDQUOT;
if (!eb_ptr->dev) {
eb_ptr->dev = dev;
- eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+ eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
eb_ptr->tipc_packet_type.dev = dev;
eb_ptr->tipc_packet_type.func = recv_msg;
eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 71c2f2fd405c..2124f32ef29f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1001,7 +1001,7 @@ static int link_bundle_buf(struct link *l_ptr,
return 0;
skb_put(bundler, pad + size);
- memcpy(bundler->data + to_pos, buf->data, size);
+ skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
msg_set_size(bundler_msg, to_pos + size);
msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
@@ -1109,8 +1109,8 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
if (bundler) {
msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
TIPC_OK, INT_H_SIZE, l_ptr->addr);
- memcpy(bundler->data, (unchar *)&bundler_hdr,
- INT_H_SIZE);
+ skb_copy_to_linear_data(bundler, &bundler_hdr,
+ INT_H_SIZE);
skb_trim(bundler, INT_H_SIZE);
link_bundle_buf(l_ptr, bundler, buf);
buf = bundler;
@@ -1383,9 +1383,9 @@ again:
if (!buf)
return -ENOMEM;
buf->next = NULL;
- memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
hsz = msg_hdr_sz(hdr);
- memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz);
+ skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
msg_dbg(buf_msg(buf), ">BUILD>");
/* Chop up message: */
@@ -1416,8 +1416,8 @@ error:
return -EFAULT;
}
} else
- memcpy(buf->data + fragm_crs, sect_crs, sz);
-
+ skb_copy_to_linear_data_offset(buf, fragm_crs,
+ sect_crs, sz);
sect_crs += sz;
sect_rest -= sz;
fragm_crs += sz;
@@ -1442,7 +1442,7 @@ error:
buf->next = NULL;
prev->next = buf;
- memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
fragm_crs = INT_H_SIZE;
fragm_rest = fragm_sz;
msg_dbg(buf_msg(buf)," >BUILD>");
@@ -2130,7 +2130,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
buf = l_ptr->proto_msg_queue;
if (!buf)
return;
- memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+ skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
return;
}
msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
@@ -2143,7 +2143,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
if (!buf)
return;
- memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+ skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
msg_set_size(buf_msg(buf), msg_size);
if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -2319,8 +2319,8 @@ void tipc_link_tunnel(struct link *l_ptr,
"unable to send tunnel msg\n");
return;
}
- memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
- memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
+ skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
msg_dbg(buf_msg(buf), ">SEND>");
tipc_link_send_buf(tunnel, buf);
@@ -2361,7 +2361,7 @@ void tipc_link_changeover(struct link *l_ptr)
buf = buf_acquire(INT_H_SIZE);
if (buf) {
- memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
msg_set_size(&tunnel_hdr, INT_H_SIZE);
dbg("%c->%c:", l_ptr->b_ptr->net_plane,
tunnel->b_ptr->net_plane);
@@ -2426,8 +2426,9 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
"unable to send duplicate msg\n");
return;
}
- memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
- memcpy(outbuf->data + INT_H_SIZE, iter->data, length);
+ skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
+ length);
dbg("%c->%c:", l_ptr->b_ptr->net_plane,
tunnel->b_ptr->net_plane);
msg_dbg(buf_msg(outbuf), ">SEND>");
@@ -2457,7 +2458,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
eb = buf_acquire(size);
if (eb)
- memcpy(eb->data, (unchar *)msg, size);
+ skb_copy_to_linear_data(eb, msg, size);
return eb;
}
@@ -2569,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
if (obuf == NULL) {
warn("Link unable to unbundle message(s)\n");
break;
- };
+ }
pos += align(msg_size(buf_msg(obuf)));
msg_dbg(buf_msg(obuf), " /");
tipc_net_route_msg(obuf);
@@ -2631,9 +2632,9 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
goto exit;
}
msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE);
- memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz);
-
+ skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
+ fragm_sz);
/* Send queued messages first, if any: */
l_ptr->stats.sent_fragments++;
@@ -2733,8 +2734,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
if (pbuf != NULL) {
pbuf->next = *pending;
*pending = pbuf;
- memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm));
-
+ skb_copy_to_linear_data(pbuf, imsg,
+ msg_data_sz(fragm));
/* Prepare buffer for subsequent fragments. */
set_long_msg_seqno(pbuf, long_msg_seq_no);
@@ -2750,7 +2751,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
u32 fsz = get_fragm_size(pbuf);
u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
u32 exp_frags = get_expected_frags(pbuf) - 1;
- memcpy(pbuf->data + crs, msg_data(fragm), dsz);
+ skb_copy_to_linear_data_offset(pbuf, crs,
+ msg_data(fragm), dsz);
buf_discard(fbuf);
/* Is message complete? */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 62d549063604..35d5ba1d4f42 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,8 +1,8 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -71,8 +71,11 @@ static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
static inline void msg_set_bits(struct tipc_msg *m, u32 w,
u32 pos, u32 mask, u32 val)
{
- u32 word = msg_word(m,w) & ~(mask << pos);
- msg_set_word(m, w, (word |= (val << pos)));
+ val = (val & mask) << pos;
+ val = htonl(val);
+ mask = htonl(mask << pos);
+ m->hdr[w] &= ~mask;
+ m->hdr[w] |= val;
}
/*
@@ -786,15 +789,16 @@ static inline int msg_build(struct tipc_msg *hdr,
*buf = buf_acquire(sz);
if (!(*buf))
return -ENOMEM;
- memcpy((*buf)->data, (unchar *)hdr, hsz);
+ skb_copy_to_linear_data(*buf, hdr, hsz);
for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
if (likely(usrmem))
res = !copy_from_user((*buf)->data + pos,
msg_sect[cnt].iov_base,
msg_sect[cnt].iov_len);
else
- memcpy((*buf)->data + pos, msg_sect[cnt].iov_base,
- msg_sect[cnt].iov_len);
+ skb_copy_to_linear_data_offset(*buf, pos,
+ msg_sect[cnt].iov_base,
+ msg_sect[cnt].iov_len);
pos += msg_sect[cnt].iov_len;
}
if (likely(res))
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b8e1edc2badc..4cdafa2d1d4d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
if (rep_buf) {
skb_push(rep_buf, hdr_space);
- rep_nlh = (struct nlmsghdr *)rep_buf->data;
+ rep_nlh = nlmsg_hdr(rep_buf);
memcpy(rep_nlh, req_nlh, hdr_space);
rep_nlh->nlmsg_len = rep_buf->len;
genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5f8217d4b452..bcd5da00737b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -464,7 +464,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
msg_set_size(rmsg, data_sz + hdr_sz);
msg_set_nametype(rmsg, msg_nametype(msg));
msg_set_nameinst(rmsg, msg_nameinst(msg));
- memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz);
+ skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
/* send self-abort message when rejecting on a connected port */
if (msg_connected(msg)) {
@@ -1419,7 +1419,7 @@ int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
return -ENOMEM;
skb_push(buf, hsz);
- memcpy(buf->data, (unchar *)msg, hsz);
+ skb_copy_to_linear_data(buf, msg, hsz);
destnode = msg_destnode(msg);
p_ptr->publ.congested = 1;
if (!tipc_port_congested(p_ptr)) {
@@ -1555,7 +1555,7 @@ int tipc_forward_buf2name(u32 ref,
if (skb_cow(buf, LONG_H_SIZE))
return -ENOMEM;
skb_push(buf, LONG_H_SIZE);
- memcpy(buf->data, (unchar *)msg, LONG_H_SIZE);
+ skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
msg_dbg(buf_msg(buf),"PREP:");
if (likely(destport || destnode)) {
p_ptr->sent++;
@@ -1679,7 +1679,7 @@ int tipc_forward_buf2port(u32 ref,
return -ENOMEM;
skb_push(buf, DIR_MSG_H_SIZE);
- memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE);
+ skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
msg_dbg(msg, "buf2port: ");
p_ptr->sent++;
if (dest->node == tipc_own_addr)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b71739fbe2c6..45832fb75ea4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1020,7 +1020,7 @@ restart:
if (!err) {
buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
- sz = buf->tail - buf_crs;
+ sz = skb_tail_pointer(buf) - buf_crs;
needed = (buf_len - sz_copied);
sz_to_copy = (sz <= needed) ? sz : needed;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 606971645b33..fc12ba51c1fc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -111,7 +111,6 @@
#include <net/scm.h>
#include <linux/init.h>
#include <linux/poll.h>
-#include <linux/smp_lock.h>
#include <linux/rtnetlink.h>
#include <linux/mount.h>
#include <net/checksum.h>
@@ -1319,7 +1318,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
unix_attach_fds(siocb->scm, skb);
unix_get_secdata(siocb->scm, skb);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
if (err)
goto out_free;
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
deleted file mode 100644
index 41d7e32be70d..000000000000
--- a/net/wanrouter/af_wanpipe.c
+++ /dev/null
@@ -1,2600 +0,0 @@
-/*****************************************************************************
-* af_wanpipe.c WANPIPE(tm) Secure Socket Layer.
-*
-* Author: Nenad Corbic <ncorbic@sangoma.com>
-*
-* Copyright: (c) 2000 Sangoma Technologies Inc.
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License
-* as published by the Free Software Foundation; either version
-* 2 of the License, or (at your option) any later version.
-* ============================================================================
-* Due Credit:
-* Wanpipe socket layer is based on Packet and
-* the X25 socket layers. The above sockets were
-* used for the specific use of Sangoma Technologies
-* API programs.
-* Packet socket Authors: Ross Biro, Fred N. van Kempen and
-* Alan Cox.
-* X25 socket Author: Jonathan Naylor.
-* ============================================================================
-* Mar 15, 2002 Arnaldo C. Melo o Use wp_sk()->num, as it isnt anymore in sock
-* Apr 25, 2000 Nenad Corbic o Added the ability to send zero length packets.
-* Mar 13, 2000 Nenad Corbic o Added a tx buffer check via ioctl call.
-* Mar 06, 2000 Nenad Corbic o Fixed the corrupt sock lcn problem.
-* Server and client application can run
-* simultaneously without conflicts.
-* Feb 29, 2000 Nenad Corbic o Added support for PVC protocols, such as
-* CHDLC, Frame Relay and HDLC API.
-* Jan 17, 2000 Nenad Corbic o Initial version, based on AF_PACKET socket.
-* X25API support only.
-*
-******************************************************************************/
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/capability.h>
-#include <linux/fcntl.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/poll.h>
-#include <linux/wireless.h>
-#include <linux/kmod.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/if_wanpipe.h>
-#include <linux/pkt_sched.h>
-#include <linux/tcp_states.h>
-#include <linux/if_wanpipe_common.h>
-
-#ifdef CONFIG_INET
-#include <net/inet_common.h>
-#endif
-
-#define SLOW_BACKOFF 0.1*HZ
-#define FAST_BACKOFF 0.01*HZ
-
-//#define PRINT_DEBUG
-#ifdef PRINT_DEBUG
- #define DBG_PRINTK(format, a...) printk(format, ## a)
-#else
- #define DBG_PRINTK(format, a...)
-#endif
-
-
-/* SECURE SOCKET IMPLEMENTATION
- *
- * TRANSMIT:
- *
- * When the user sends a packet via send() system call
- * the wanpipe_sendmsg() function is executed.
- *
- * Each packet is enqueud into sk->sk_write_queue transmit
- * queue. When the packet is enqueued, a delayed transmit
- * timer is triggerd which acts as a Bottom Half hander.
- *
- * wanpipe_delay_transmit() function (BH), dequeues packets
- * from the sk->sk_write_queue transmit queue and sends it
- * to the deriver via dev->hard_start_xmit(skb, dev) function.
- * Note, this function is actual a function pointer of if_send()
- * routine in the wanpipe driver.
- *
- * X25API GUARANTEED DELIVERY:
- *
- * In order to provide 100% guaranteed packet delivery,
- * an atomic 'packet_sent' counter is implemented. Counter
- * is incremented for each packet enqueued
- * into sk->sk_write_queue. Counter is decremented each
- * time wanpipe_delayed_transmit() function successfuly
- * passes the packet to the driver. Before each send(), a poll
- * routine checks the sock resources The maximum value of
- * packet sent counter is 1, thus if one packet is queued, the
- * application will block until that packet is passed to the
- * driver.
- *
- * RECEIVE:
- *
- * Wanpipe device drivers call the socket bottom half
- * function, wanpipe_rcv() to queue the incoming packets
- * into an AF_WANPIPE socket queue. Based on wanpipe_rcv()
- * return code, the driver knows whether the packet was
- * successfully queued. If the socket queue is full,
- * protocol flow control is used by the driver, if any,
- * to slow down the traffic until the sock queue is free.
- *
- * Every time a packet arrives into a socket queue the
- * socket wakes up processes which are waiting to receive
- * data.
- *
- * If the socket queue is full, the driver sets a block
- * bit which signals the socket to kick the wanpipe driver
- * bottom half hander when the socket queue is partialy
- * empty. wanpipe_recvmsg() function performs this action.
- *
- * In case of x25api, packets will never be dropped, since
- * flow control is available.
- *
- * In case of streaming protocols like CHDLC, packets will
- * be dropped but the statistics will be generated.
- */
-
-
-/* The code below is used to test memory leaks. It prints out
- * a message every time kmalloc and kfree system calls get executed.
- * If the calls match there is no leak :)
- */
-
-/***********FOR DEBUGGING PURPOSES*********************************************
-#define KMEM_SAFETYZONE 8
-
-static void * dbg_kmalloc(unsigned int size, int prio, int line) {
- void * v = kmalloc(size,prio);
- printk(KERN_INFO "line %d kmalloc(%d,%d) = %p\n",line,size,prio,v);
- return v;
-}
-static void dbg_kfree(void * v, int line) {
- printk(KERN_INFO "line %d kfree(%p)\n",line,v);
- kfree(v);
-}
-
-#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__)
-#define kfree(x) dbg_kfree(x,__LINE__)
-******************************************************************************/
-
-
-/* List of all wanpipe sockets. */
-HLIST_HEAD(wanpipe_sklist);
-static DEFINE_RWLOCK(wanpipe_sklist_lock);
-
-atomic_t wanpipe_socks_nr;
-static unsigned long wanpipe_tx_critical;
-
-#if 0
-/* Private wanpipe socket structures. */
-struct wanpipe_opt
-{
- void *mbox; /* Mail box */
- void *card; /* Card bouded to */
- struct net_device *dev; /* Bounded device */
- unsigned short lcn; /* Binded LCN */
- unsigned char svc; /* 0=pvc, 1=svc */
- unsigned char timer; /* flag for delayed transmit*/
- struct timer_list tx_timer;
- unsigned poll_cnt;
- unsigned char force; /* Used to force sock release */
- atomic_t packet_sent;
-};
-#endif
-
-static int sk_count;
-extern const struct proto_ops wanpipe_ops;
-static unsigned long find_free_critical;
-
-static void wanpipe_unlink_driver(struct sock *sk);
-static void wanpipe_link_driver(struct net_device *dev, struct sock *sk);
-static void wanpipe_wakeup_driver(struct sock *sk);
-static int execute_command(struct sock *, unsigned char, unsigned int);
-static int check_dev(struct net_device *dev, sdla_t *card);
-struct net_device *wanpipe_find_free_dev(sdla_t *card);
-static void wanpipe_unlink_card (struct sock *);
-static int wanpipe_link_card (struct sock *);
-static struct sock *wanpipe_make_new(struct sock *);
-static struct sock *wanpipe_alloc_socket(void);
-static inline int get_atomic_device(struct net_device *dev);
-static int wanpipe_exec_cmd(struct sock *, int, unsigned int);
-static int get_ioctl_cmd (struct sock *, void *);
-static int set_ioctl_cmd (struct sock *, void *);
-static void release_device(struct net_device *dev);
-static void wanpipe_kill_sock_timer (unsigned long data);
-static void wanpipe_kill_sock_irq (struct sock *);
-static void wanpipe_kill_sock_accept (struct sock *);
-static int wanpipe_do_bind(struct sock *sk, struct net_device *dev,
- int protocol);
-struct sock * get_newsk_from_skb (struct sk_buff *);
-static int wanpipe_debug (struct sock *, void *);
-static void wanpipe_delayed_transmit (unsigned long data);
-static void release_driver(struct sock *);
-static void start_cleanup_timer (struct sock *);
-static void check_write_queue(struct sock *);
-static int check_driver_busy (struct sock *);
-
-/*============================================================
- * wanpipe_rcv
- *
- * Wanpipe socket bottom half handler. This function
- * is called by the WANPIPE device drivers to queue a
- * incoming packet into the socket receive queue.
- * Once the packet is queued, all processes waiting to
- * read are woken up.
- *
- * During socket bind, this function is bounded into
- * WANPIPE driver private.
- *===========================================================*/
-
-static int wanpipe_rcv(struct sk_buff *skb, struct net_device *dev,
- struct sock *sk)
-{
- struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)skb->cb;
- wanpipe_common_t *chan = dev->priv;
- /*
- * When we registered the protocol we saved the socket in the data
- * field for just this event.
- */
-
- skb->dev = dev;
-
- sll->sll_family = AF_WANPIPE;
- sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
- sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
- sll->sll_halen = 0;
-
- if (dev->hard_header_parse)
- sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
-
- /*
- * WAN_PACKET_DATA : Data which should be passed up the receive queue.
- * WAN_PACKET_ASYC : Asynchronous data like place call, which should
- * be passed up the listening sock.
- * WAN_PACKET_ERR : Asynchronous data like clear call or restart
- * which should go into an error queue.
- */
- switch (skb->pkt_type){
-
- case WAN_PACKET_DATA:
- if (sock_queue_rcv_skb(sk,skb)<0){
- return -ENOMEM;
- }
- break;
- case WAN_PACKET_CMD:
- sk->sk_state = chan->state;
- /* Bug fix: update Mar6.
- * Do not set the sock lcn number here, since
- * cmd is not guaranteed to be executed on the
- * board, thus Lcn could be wrong */
- sk->sk_data_ready(sk, skb->len);
- kfree_skb(skb);
- break;
- case WAN_PACKET_ERR:
- sk->sk_state = chan->state;
- if (sock_queue_err_skb(sk,skb)<0){
- return -ENOMEM;
- }
- break;
- default:
- printk(KERN_INFO "wansock: BH Illegal Packet Type Dropping\n");
- kfree_skb(skb);
- break;
- }
-
-//??????????????????????
-// if (sk->sk_state == WANSOCK_DISCONNECTED){
-// if (sk->sk_zapped) {
-// //printk(KERN_INFO "wansock: Disconnected, killing early\n");
-// wanpipe_unlink_driver(sk);
-// sk->sk_bound_dev_if = 0;
-// }
-// }
-
- return 0;
-}
-
-/*============================================================
- * wanpipe_listen_rcv
- *
- * Wanpipe LISTEN socket bottom half handler. This function
- * is called by the WANPIPE device drivers to queue an
- * incoming call into the socket listening queue.
- * Once the packet is queued, the waiting accept() process
- * is woken up.
- *
- * During socket bind, this function is bounded into
- * WANPIPE driver private.
- *
- * IMPORTANT NOTE:
- * The accept call() is waiting for an skb packet
- * which contains a pointer to a device structure.
- *
- * When we do a bind to a device structre, we
- * bind a newly created socket into "chan->sk". Thus,
- * when accept receives the skb packet, it will know
- * from which dev it came form, and in turn it will know
- * the address of the new sock.
- *
- * NOTE: This function gets called from driver ISR.
- *===========================================================*/
-
-static int wanpipe_listen_rcv (struct sk_buff *skb, struct sock *sk)
-{
- wanpipe_opt *wp = wp_sk(sk), *newwp;
- struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)skb->cb;
- struct sock *newsk;
- struct net_device *dev;
- sdla_t *card;
- mbox_cmd_t *mbox_ptr;
- wanpipe_common_t *chan;
-
- /* Find a free device, if none found, all svc's are busy
- */
-
- card = (sdla_t*)wp->card;
- if (!card){
- printk(KERN_INFO "wansock: LISTEN ERROR, No Card\n");
- return -ENODEV;
- }
-
- dev = wanpipe_find_free_dev(card);
- if (!dev){
- printk(KERN_INFO "wansock: LISTEN ERROR, No Free Device\n");
- return -ENODEV;
- }
-
- chan=dev->priv;
- chan->state = WANSOCK_CONNECTING;
-
- /* Allocate a new sock, which accept will bind
- * and pass up to the user
- */
- if ((newsk = wanpipe_make_new(sk)) == NULL){
- release_device(dev);
- return -ENOMEM;
- }
-
-
- /* Initialize the new sock structure
- */
- newsk->sk_bound_dev_if = dev->ifindex;
- newwp = wp_sk(newsk);
- newwp->card = wp->card;
-
- /* Insert the sock into the main wanpipe
- * sock list.
- */
- atomic_inc(&wanpipe_socks_nr);
-
- /* Allocate and fill in the new Mail Box. Then
- * bind the mail box to the sock. It will be
- * used by the ioctl call to read call information
- * and to execute commands.
- */
- if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
- wanpipe_kill_sock_irq (newsk);
- release_device(dev);
- return -ENOMEM;
- }
- memcpy(mbox_ptr,skb->data,skb->len);
-
- /* Register the lcn on which incoming call came
- * from. Thus, if we have to clear it, we know
- * which lcn to clear
- */
-
- newwp->lcn = mbox_ptr->cmd.lcn;
- newwp->mbox = (void *)mbox_ptr;
-
- DBG_PRINTK(KERN_INFO "NEWSOCK : Device %s, bind to lcn %i\n",
- dev->name,mbox_ptr->cmd.lcn);
-
- chan->lcn = mbox_ptr->cmd.lcn;
- card->u.x.svc_to_dev_map[(chan->lcn%MAX_X25_LCN)] = dev;
-
- sock_reset_flag(newsk, SOCK_ZAPPED);
- newwp->num = htons(X25_PROT);
-
- if (wanpipe_do_bind(newsk, dev, newwp->num)) {
- wanpipe_kill_sock_irq (newsk);
- release_device(dev);
- return -EINVAL;
- }
- newsk->sk_state = WANSOCK_CONNECTING;
-
-
- /* Fill in the standard sock address info */
-
- sll->sll_family = AF_WANPIPE;
- sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
- sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
- sll->sll_halen = 0;
-
- skb->dev = dev;
- sk->sk_ack_backlog++;
-
- /* We must do this manually, since the sock_queue_rcv_skb()
- * function sets the skb->dev to NULL. However, we use
- * the dev field in the accept function.*/
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned)sk->sk_rcvbuf) {
-
- wanpipe_unlink_driver(newsk);
- wanpipe_kill_sock_irq (newsk);
- --sk->sk_ack_backlog;
- return -ENOMEM;
- }
-
- skb_set_owner_r(skb, sk);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
-
- return 0;
-}
-
-
-
-/*============================================================
- * wanpipe_make_new
- *
- * Create a new sock, and allocate a wanpipe private
- * structure to it. Also, copy the important data
- * from the original sock to the new sock.
- *
- * This function is used by wanpipe_listen_rcv() listen
- * bottom half handler. A copy of the listening sock
- * is created using this function.
- *
- *===========================================================*/
-
-static struct sock *wanpipe_make_new(struct sock *osk)
-{
- struct sock *sk;
-
- if (osk->sk_type != SOCK_RAW)
- return NULL;
-
- if ((sk = wanpipe_alloc_socket()) == NULL)
- return NULL;
-
- sk->sk_type = osk->sk_type;
- sk->sk_socket = osk->sk_socket;
- sk->sk_priority = osk->sk_priority;
- sk->sk_protocol = osk->sk_protocol;
- wp_sk(sk)->num = wp_sk(osk)->num;
- sk->sk_rcvbuf = osk->sk_rcvbuf;
- sk->sk_sndbuf = osk->sk_sndbuf;
- sk->sk_state = WANSOCK_CONNECTING;
- sk->sk_sleep = osk->sk_sleep;
-
- if (sock_flag(osk, SOCK_DBG))
- sock_set_flag(sk, SOCK_DBG);
-
- return sk;
-}
-
-/*
- * FIXME: wanpipe_opt has to include a sock in its definition and stop using
- * sk_protinfo, but this code is not even compilable now, so lets leave it for
- * later.
- */
-static struct proto wanpipe_proto = {
- .name = "WANPIPE",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct sock),
-};
-
-/*============================================================
- * wanpipe_make_new
- *
- * Allocate memory for the a new sock, and sock
- * private data.
- *
- * Increment the module use count.
- *
- * This function is used by wanpipe_create() and
- * wanpipe_make_new() functions.
- *
- *===========================================================*/
-
-static struct sock *wanpipe_alloc_socket(void)
-{
- struct sock *sk;
- struct wanpipe_opt *wan_opt;
-
- if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL)
- return NULL;
-
- if ((wan_opt = kzalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
- sk_free(sk);
- return NULL;
- }
-
- wp_sk(sk) = wan_opt;
-
- /* Use timer to send data to the driver. This will act
- * as a BH handler for sendmsg functions */
- init_timer(&wan_opt->tx_timer);
- wan_opt->tx_timer.data = (unsigned long)sk;
- wan_opt->tx_timer.function = wanpipe_delayed_transmit;
-
- sock_init_data(NULL, sk);
- return sk;
-}
-
-
-/*============================================================
- * wanpipe_sendmsg
- *
- * This function implements a sendto() system call,
- * for AF_WANPIPE socket family.
- * During socket bind() sk->sk_bound_dev_if is initialized
- * to a correct network device. This number is used
- * to find a network device to which the packet should
- * be passed to.
- *
- * Each packet is queued into sk->sk_write_queue and
- * delayed transmit bottom half handler is marked for
- * execution.
- *
- * A socket must be in WANSOCK_CONNECTED state before
- * a packet is queued into sk->sk_write_queue.
- *===========================================================*/
-
-static int wanpipe_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, int len)
-{
- wanpipe_opt *wp;
- struct sock *sk = sock->sk;
- struct wan_sockaddr_ll *saddr=(struct wan_sockaddr_ll *)msg->msg_name;
- struct sk_buff *skb;
- struct net_device *dev;
- unsigned short proto;
- unsigned char *addr;
- int ifindex, err, reserve = 0;
-
-
- if (!sock_flag(sk, SOCK_ZAPPED))
- return -ENETDOWN;
-
- if (sk->sk_state != WANSOCK_CONNECTED)
- return -ENOTCONN;
-
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
- return(-EINVAL);
-
- /* it was <=, now one can send
- * zero length packets */
- if (len < sizeof(x25api_hdr_t))
- return -EINVAL;
-
- wp = wp_sk(sk);
-
- if (saddr == NULL) {
- ifindex = sk->sk_bound_dev_if;
- proto = wp->num;
- addr = NULL;
-
- }else{
- if (msg->msg_namelen < sizeof(struct wan_sockaddr_ll)){
- return -EINVAL;
- }
-
- ifindex = sk->sk_bound_dev_if;
- proto = saddr->sll_protocol;
- addr = saddr->sll_addr;
- }
-
- dev = dev_get_by_index(ifindex);
- if (dev == NULL){
- printk(KERN_INFO "wansock: Send failed, dev index: %i\n",ifindex);
- return -ENXIO;
- }
- dev_put(dev);
-
- if (sock->type == SOCK_RAW)
- reserve = dev->hard_header_len;
-
- if (len > dev->mtu+reserve){
- return -EMSGSIZE;
- }
-
- skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
- msg->msg_flags & MSG_DONTWAIT, &err);
-
- if (skb==NULL){
- goto out_unlock;
- }
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
-
- /* Returns -EFAULT on error */
- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
- if (err){
- goto out_free;
- }
-
- if (dev->hard_header) {
- int res;
- err = -EINVAL;
- res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
- if (res<0){
- goto out_free;
- }
- }
-
- skb->protocol = proto;
- skb->dev = dev;
- skb->priority = sk->sk_priority;
- skb->pkt_type = WAN_PACKET_DATA;
-
- err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_free;
-
- if (atomic_read(&sk->sk_wmem_alloc) + skb->truesize >
- (unsigned int)sk->sk_sndbuf){
- kfree_skb(skb);
- return -ENOBUFS;
- }
-
- skb_queue_tail(&sk->sk_write_queue,skb);
- atomic_inc(&wp->packet_sent);
-
- if (!(test_and_set_bit(0, &wp->timer)))
- mod_timer(&wp->tx_timer, jiffies + 1);
-
- return(len);
-
-out_free:
- kfree_skb(skb);
-out_unlock:
- return err;
-}
-
-/*============================================================
- * wanpipe_delayed_tarnsmit
- *
- * Transmit bottom half handler. It dequeues packets
- * from sk->sk_write_queue and passes them to the
- * driver. If the driver is busy, the packet is
- * re-enqueued.
- *
- * Packet Sent counter is decremented on successful
- * transmission.
- *===========================================================*/
-
-
-static void wanpipe_delayed_transmit (unsigned long data)
-{
- struct sock *sk=(struct sock *)data;
- struct sk_buff *skb;
- wanpipe_opt *wp = wp_sk(sk);
- struct net_device *dev = wp->dev;
- sdla_t *card = (sdla_t*)wp->card;
-
- if (!card || !dev){
- clear_bit(0, &wp->timer);
- DBG_PRINTK(KERN_INFO "wansock: Transmit delay, no dev or card\n");
- return;
- }
-
- if (sk->sk_state != WANSOCK_CONNECTED || !sock_flag(sk, SOCK_ZAPPED)) {
- clear_bit(0, &wp->timer);
- DBG_PRINTK(KERN_INFO "wansock: Tx Timer, State not CONNECTED\n");
- return;
- }
-
- /* If driver is executing command, we must offload
- * the board by not sending data. Otherwise a
- * pending command will never get a free buffer
- * to execute */
- if (atomic_read(&card->u.x.command_busy)){
- wp->tx_timer.expires = jiffies + SLOW_BACKOFF;
- add_timer(&wp->tx_timer);
- DBG_PRINTK(KERN_INFO "wansock: Tx Timer, command bys BACKOFF\n");
- return;
- }
-
-
- if (test_and_set_bit(0,&wanpipe_tx_critical)){
- printk(KERN_INFO "WanSock: Tx timer critical %s\n",dev->name);
- wp->tx_timer.expires = jiffies + SLOW_BACKOFF;
- add_timer(&wp->tx_timer);
- return;
- }
-
- /* Check for a packet in the fifo and send */
- if ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL){
-
- if (dev->hard_start_xmit(skb, dev) != 0){
-
- /* Driver failed to transmit, re-enqueue
- * the packet and retry again later */
- skb_queue_head(&sk->sk_write_queue,skb);
- clear_bit(0,&wanpipe_tx_critical);
- return;
- }else{
-
- /* Packet Sent successful. Check for more packets
- * if more packets, re-trigger the transmit routine
- * other wise exit
- */
- atomic_dec(&wp->packet_sent);
-
- if (skb_peek(&sk->sk_write_queue) == NULL) {
- /* If there is nothing to send, kick
- * the poll routine, which will trigger
- * the application to send more data */
- sk->sk_data_ready(sk, 0);
- clear_bit(0, &wp->timer);
- }else{
- /* Reschedule as fast as possible */
- wp->tx_timer.expires = jiffies + 1;
- add_timer(&wp->tx_timer);
- }
- }
- }
- clear_bit(0,&wanpipe_tx_critical);
-}
-
-/*============================================================
- * execute_command
- *
- * Execute x25api commands. The atomic variable
- * chan->command is used to indicate to the driver that
- * command is pending for execution. The acutal command
- * structure is placed into a sock mbox structure
- * (wp_sk(sk)->mbox).
- *
- * The sock private structure, mbox is
- * used as shared memory between sock and the driver.
- * Driver uses the sock mbox to execute the command
- * and return the result.
- *
- * For all command except PLACE CALL, the function
- * waits for the result. PLACE CALL can be ether
- * blocking or nonblocking. The user sets this option
- * via ioctl call.
- *===========================================================*/
-
-
-static int execute_command(struct sock *sk, unsigned char cmd, unsigned int flags)
-{
- wanpipe_opt *wp = wp_sk(sk);
- struct net_device *dev;
- wanpipe_common_t *chan=NULL;
- int err=0;
- DECLARE_WAITQUEUE(wait, current);
-
- dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (dev == NULL){
- printk(KERN_INFO "wansock: Exec failed no dev %i\n",
- sk->sk_bound_dev_if);
- return -ENODEV;
- }
- dev_put(dev);
-
- if ((chan=dev->priv) == NULL){
- printk(KERN_INFO "wansock: Exec cmd failed no priv area\n");
- return -ENODEV;
- }
-
- if (atomic_read(&chan->command)){
- printk(KERN_INFO "wansock: ERROR: Command already running %x, %s\n",
- atomic_read(&chan->command),dev->name);
- return -EINVAL;
- }
-
- if (!wp->mbox) {
- printk(KERN_INFO "wansock: In execute without MBOX\n");
- return -EINVAL;
- }
-
- ((mbox_cmd_t*)wp->mbox)->cmd.command = cmd;
- ((mbox_cmd_t*)wp->mbox)->cmd.lcn = wp->lcn;
- ((mbox_cmd_t*)wp->mbox)->cmd.result = 0x7F;
-
-
- if (flags & O_NONBLOCK){
- cmd |= 0x80;
- atomic_set(&chan->command, cmd);
- }else{
- atomic_set(&chan->command, cmd);
- }
-
- add_wait_queue(sk->sk_sleep,&wait);
- current->state = TASK_INTERRUPTIBLE;
- for (;;){
- if (((mbox_cmd_t*)wp->mbox)->cmd.result != 0x7F) {
- err = 0;
- break;
- }
- if (signal_pending(current)) {
- err = -ERESTARTSYS;
- break;
- }
- schedule();
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep,&wait);
-
- return err;
-}
-
-/*============================================================
- * wanpipe_destroy_timer
- *
- * Used by wanpipe_release, to delay release of
- * the socket.
- *===========================================================*/
-
-static void wanpipe_destroy_timer(unsigned long data)
-{
- struct sock *sk=(struct sock *)data;
- wanpipe_opt *wp = wp_sk(sk);
-
- if ((!atomic_read(&sk->sk_wmem_alloc) &&
- !atomic_read(&sk->sk_rmem_alloc)) ||
- (++wp->force == 5)) {
-
- if (atomic_read(&sk->sk_wmem_alloc) ||
- atomic_read(&sk->sk_rmem_alloc))
- printk(KERN_INFO "wansock: Warning, Packet Discarded due to sock shutdown!\n");
-
- kfree(wp);
- wp_sk(sk) = NULL;
-
- if (atomic_read(&sk->sk_refcnt) != 1) {
- atomic_set(&sk->sk_refcnt, 1);
- DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :delay.\n",
- atomic_read(&sk->sk_refcnt));
- }
- sock_put(sk);
- atomic_dec(&wanpipe_socks_nr);
- return;
- }
-
- sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
- printk(KERN_INFO "wansock: packet sk destroy delayed\n");
-}
-
-/*============================================================
- * wanpipe_unlink_driver
- *
- * When the socket is released, this function is
- * used to remove links that bind the sock and the
- * driver together.
- *===========================================================*/
-static void wanpipe_unlink_driver (struct sock *sk)
-{
- struct net_device *dev;
- wanpipe_common_t *chan=NULL;
-
- sock_reset_flag(sk, SOCK_ZAPPED);
- sk->sk_state = WANSOCK_DISCONNECTED;
- wp_sk(sk)->dev = NULL;
-
- dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (!dev){
- printk(KERN_INFO "wansock: No dev on release\n");
- return;
- }
- dev_put(dev);
-
- if ((chan = dev->priv) == NULL){
- printk(KERN_INFO "wansock: No Priv Area on release\n");
- return;
- }
-
- set_bit(0,&chan->common_critical);
- chan->sk=NULL;
- chan->func=NULL;
- chan->mbox=NULL;
- chan->tx_timer=NULL;
- clear_bit(0,&chan->common_critical);
- release_device(dev);
-
- return;
-}
-
-/*============================================================
- * wanpipe_link_driver
- *
- * Upon successful bind(), sock is linked to a driver
- * by binding in the wanpipe_rcv() bottom half handler
- * to the driver function pointer, as well as sock and
- * sock mailbox addresses. This way driver can pass
- * data up the socket.
- *===========================================================*/
-
-static void wanpipe_link_driver(struct net_device *dev, struct sock *sk)
-{
- wanpipe_opt *wp = wp_sk(sk);
- wanpipe_common_t *chan = dev->priv;
- if (!chan)
- return;
- set_bit(0,&chan->common_critical);
- chan->sk=sk;
- chan->func=wanpipe_rcv;
- chan->mbox = wp->mbox;
- chan->tx_timer = &wp->tx_timer;
- wp->dev = dev;
- sock_set_flag(sk, SOCK_ZAPPED);
- clear_bit(0,&chan->common_critical);
-}
-
-
-/*============================================================
- * release_device
- *
- * During sock release, clear a critical bit, which
- * marks the device a being taken.
- *===========================================================*/
-
-
-static void release_device(struct net_device *dev)
-{
- wanpipe_common_t *chan=dev->priv;
- clear_bit(0,(void*)&chan->rw_bind);
-}
-
-/*============================================================
- * wanpipe_release
- *
- * Close a PACKET socket. This is fairly simple. We
- * immediately go to 'closed' state and remove our
- * protocol entry in the device list.
- *===========================================================*/
-
-static int wanpipe_release(struct socket *sock)
-{
- wanpipe_opt *wp;
- struct sock *sk = sock->sk;
-
- if (!sk)
- return 0;
-
- wp = wp_sk(sk);
- check_write_queue(sk);
-
- /* Kill the tx timer, if we don't kill it now, the timer
- * will run after we kill the sock. Timer code will
- * try to access the sock which has been killed and cause
- * kernel panic */
-
- del_timer(&wp->tx_timer);
-
- /*
- * Unhook packet receive handler.
- */
-
- if (wp->num == htons(X25_PROT) &&
- sk->sk_state != WANSOCK_DISCONNECTED && sock_flag(sk, SOCK_ZAPPED)) {
- struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
- wanpipe_common_t *chan;
- if (dev){
- chan=dev->priv;
- atomic_set(&chan->disconnect,1);
- DBG_PRINTK(KERN_INFO "wansock: Sending Clear Indication %i\n",
- sk->sk_state);
- dev_put(dev);
- }
- }
-
- set_bit(1,&wanpipe_tx_critical);
- write_lock(&wanpipe_sklist_lock);
- sk_del_node_init(sk);
- write_unlock(&wanpipe_sklist_lock);
- clear_bit(1,&wanpipe_tx_critical);
-
-
-
- release_driver(sk);
-
-
- /*
- * Now the socket is dead. No more input will appear.
- */
-
- sk->sk_state_change(sk); /* It is useless. Just for sanity. */
-
- sock->sk = NULL;
- sk->sk_socket = NULL;
- sock_set_flag(sk, SOCK_DEAD);
-
- /* Purge queues */
- skb_queue_purge(&sk->sk_receive_queue);
- skb_queue_purge(&sk->sk_write_queue);
- skb_queue_purge(&sk->sk_error_queue);
-
- if (atomic_read(&sk->sk_rmem_alloc) ||
- atomic_read(&sk->sk_wmem_alloc)) {
- del_timer(&sk->sk_timer);
- printk(KERN_INFO "wansock: Killing in Timer R %i , W %i\n",
- atomic_read(&sk->sk_rmem_alloc),
- atomic_read(&sk->sk_wmem_alloc));
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.expires = jiffies + HZ;
- sk->sk_timer.function = wanpipe_destroy_timer;
- add_timer(&sk->sk_timer);
- return 0;
- }
-
- kfree(wp);
- wp_sk(sk) = NULL;
-
- if (atomic_read(&sk->sk_refcnt) != 1) {
- DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i !:release.\n",
- atomic_read(&sk->sk_refcnt));
- atomic_set(&sk->sk_refcnt, 1);
- }
- sock_put(sk);
- atomic_dec(&wanpipe_socks_nr);
- return 0;
-}
-
-/*============================================================
- * check_write_queue
- *
- * During sock shutdown, if the sock state is
- * WANSOCK_CONNECTED and there is transmit data
- * pending. Wait until data is released
- * before proceeding.
- *===========================================================*/
-
-static void check_write_queue(struct sock *sk)
-{
-
- if (sk->sk_state != WANSOCK_CONNECTED)
- return;
-
- if (!atomic_read(&sk->sk_wmem_alloc))
- return;
-
- printk(KERN_INFO "wansock: MAJOR ERROR, Data lost on sock release !!!\n");
-
-}
-
-/*============================================================
- * release_driver
- *
- * This function is called during sock shutdown, to
- * release any resources and links that bind the sock
- * to the driver. It also changes the state of the
- * sock to WANSOCK_DISCONNECTED
- *===========================================================*/
-
-static void release_driver(struct sock *sk)
-{
- wanpipe_opt *wp;
- struct sk_buff *skb=NULL;
- struct sock *deadsk=NULL;
-
- if (sk->sk_state == WANSOCK_LISTEN ||
- sk->sk_state == WANSOCK_BIND_LISTEN) {
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if ((deadsk = get_newsk_from_skb(skb))){
- DBG_PRINTK (KERN_INFO "wansock: RELEASE: FOUND DEAD SOCK\n");
- sock_set_flag(deadsk, SOCK_DEAD);
- start_cleanup_timer(deadsk);
- }
- kfree_skb(skb);
- }
- if (sock_flag(sk, SOCK_ZAPPED))
- wanpipe_unlink_card(sk);
- }else{
- if (sock_flag(sk, SOCK_ZAPPED))
- wanpipe_unlink_driver(sk);
- }
- sk->sk_state = WANSOCK_DISCONNECTED;
- sk->sk_bound_dev_if = 0;
- sock_reset_flag(sk, SOCK_ZAPPED);
- wp = wp_sk(sk);
-
- if (wp) {
- kfree(wp->mbox);
- wp->mbox = NULL;
- }
-}
-
-/*============================================================
- * start_cleanup_timer
- *
- * If new incoming call's are pending but the socket
- * is being released, start the timer which will
- * envoke the kill routines for pending socks.
- *===========================================================*/
-
-
-static void start_cleanup_timer (struct sock *sk)
-{
- del_timer(&sk->sk_timer);
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.expires = jiffies + HZ;
- sk->sk_timer.function = wanpipe_kill_sock_timer;
- add_timer(&sk->sk_timer);
-}
-
-
-/*============================================================
- * wanpipe_kill_sock
- *
- * This is a function which performs actual killing
- * of the sock. It releases socket resources,
- * and unlinks the sock from the driver.
- *===========================================================*/
-
-static void wanpipe_kill_sock_timer (unsigned long data)
-{
-
- struct sock *sk = (struct sock *)data;
- struct sock **skp;
-
- if (!sk)
- return;
-
- /* This function can be called from interrupt. We must use
- * appropriate locks */
-
- if (test_bit(1,&wanpipe_tx_critical)){
- sk->sk_timer.expires = jiffies + 10;
- add_timer(&sk->sk_timer);
- return;
- }
-
- write_lock(&wanpipe_sklist_lock);
- sk_del_node_init(sk);
- write_unlock(&wanpipe_sklist_lock);
-
-
- if (wp_sk(sk)->num == htons(X25_PROT) &&
- sk->sk_state != WANSOCK_DISCONNECTED) {
- struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
- wanpipe_common_t *chan;
- if (dev){
- chan=dev->priv;
- atomic_set(&chan->disconnect,1);
- dev_put(dev);
- }
- }
-
- release_driver(sk);
-
- sk->sk_socket = NULL;
-
- /* Purge queues */
- skb_queue_purge(&sk->sk_receive_queue);
- skb_queue_purge(&sk->sk_write_queue);
- skb_queue_purge(&sk->sk_error_queue);
-
- if (atomic_read(&sk->sk_rmem_alloc) ||
- atomic_read(&sk->sk_wmem_alloc)) {
- del_timer(&sk->sk_timer);
- printk(KERN_INFO "wansock: Killing SOCK in Timer\n");
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.expires = jiffies + HZ;
- sk->sk_timer.function = wanpipe_destroy_timer;
- add_timer(&sk->sk_timer);
- return;
- }
-
- kfree(wp_sk(sk));
- wp_sk(sk) = NULL;
-
- if (atomic_read(&sk->sk_refcnt) != 1) {
- atomic_set(&sk->sk_refcnt, 1);
- DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :timer.\n",
- atomic_read(&sk->sk_refcnt));
- }
- sock_put(sk);
- atomic_dec(&wanpipe_socks_nr);
- return;
-}
-
-static void wanpipe_kill_sock_accept (struct sock *sk)
-{
-
- struct sock **skp;
-
- if (!sk)
- return;
-
- /* This function can be called from interrupt. We must use
- * appropriate locks */
-
- write_lock(&wanpipe_sklist_lock);
- sk_del_node_init(sk);
- write_unlock(&wanpipe_sklist_lock);
-
- sk->sk_socket = NULL;
-
-
- kfree(wp_sk(sk));
- wp_sk(sk) = NULL;
-
- if (atomic_read(&sk->sk_refcnt) != 1) {
- atomic_set(&sk->sk_refcnt, 1);
- DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :timer.\n",
- atomic_read(&sk->sk_refcnt));
- }
- sock_put(sk);
- atomic_dec(&wanpipe_socks_nr);
- return;
-}
-
-
-static void wanpipe_kill_sock_irq (struct sock *sk)
-{
-
- if (!sk)
- return;
-
- sk->sk_socket = NULL;
-
- kfree(wp_sk(sk));
- wp_sk(sk) = NULL;
-
- if (atomic_read(&sk->sk_refcnt) != 1) {
- atomic_set(&sk->sk_refcnt, 1);
- DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i !:listen.\n",
- atomic_read(&sk->sk_refcnt));
- }
- sock_put(sk);
- atomic_dec(&wanpipe_socks_nr);
-}
-
-
-/*============================================================
- * wanpipe_do_bind
- *
- * Bottom half of the binding system call.
- * Once the wanpipe_bind() function checks the
- * legality of the call, this function binds the
- * sock to the driver.
- *===========================================================*/
-
-static int wanpipe_do_bind(struct sock *sk, struct net_device *dev,
- int protocol)
-{
- wanpipe_opt *wp = wp_sk(sk);
- wanpipe_common_t *chan=NULL;
- int err=0;
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- err = -EALREADY;
- goto bind_unlock_exit;
- }
-
- wp->num = protocol;
-
- if (protocol == 0){
- release_device(dev);
- err = -EINVAL;
- goto bind_unlock_exit;
- }
-
- if (dev) {
- if (dev->flags&IFF_UP) {
- chan=dev->priv;
- sk->sk_state = chan->state;
-
- if (wp->num == htons(X25_PROT) &&
- sk->sk_state != WANSOCK_DISCONNECTED &&
- sk->sk_state != WANSOCK_CONNECTING) {
- DBG_PRINTK(KERN_INFO
- "wansock: Binding to Device not DISCONNECTED %i\n",
- sk->sk_state);
- release_device(dev);
- err = -EAGAIN;
- goto bind_unlock_exit;
- }
-
- wanpipe_link_driver(dev,sk);
- sk->sk_bound_dev_if = dev->ifindex;
-
- /* X25 Specific option */
- if (wp->num == htons(X25_PROT))
- wp_sk(sk)->svc = chan->svc;
-
- } else {
- sk->sk_err = ENETDOWN;
- sk->sk_error_report(sk);
- release_device(dev);
- err = -EINVAL;
- }
- } else {
- err = -ENODEV;
- }
-bind_unlock_exit:
- /* FIXME where is this lock */
-
- return err;
-}
-
-/*============================================================
- * wanpipe_bind
- *
- * BIND() System call, which is bound to the AF_WANPIPE
- * operations structure. It checks for correct wanpipe
- * card name, and cross references interface names with
- * the card names. Thus, interface name must belong to
- * the actual card.
- *===========================================================*/
-
-
-static int wanpipe_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)uaddr;
- struct sock *sk=sock->sk;
- wanpipe_opt *wp = wp_sk(sk);
- struct net_device *dev = NULL;
- sdla_t *card=NULL;
- char name[15];
-
- /*
- * Check legality
- */
-
- if (addr_len < sizeof(struct wan_sockaddr_ll)){
- printk(KERN_INFO "wansock: Address length error\n");
- return -EINVAL;
- }
- if (sll->sll_family != AF_WANPIPE){
- printk(KERN_INFO "wansock: Illegal family name specified.\n");
- return -EINVAL;
- }
-
- card = wanpipe_find_card (sll->sll_card);
- if (!card){
- printk(KERN_INFO "wansock: Wanpipe card not found: %s\n",sll->sll_card);
- return -ENODEV;
- }else{
- wp_sk(sk)->card = (void *)card;
- }
-
- if (!strcmp(sll->sll_device,"svc_listen")){
-
- /* Bind a sock to a card structure for listening
- */
- int err=0;
-
- /* This is x25 specific area if protocol doesn't
- * match, return error */
- if (sll->sll_protocol != htons(X25_PROT))
- return -EINVAL;
-
- err= wanpipe_link_card (sk);
- if (err < 0)
- return err;
-
- if (sll->sll_protocol)
- wp->num = sll->sll_protocol;
- sk->sk_state = WANSOCK_BIND_LISTEN;
- return 0;
-
- }else if (!strcmp(sll->sll_device,"svc_connect")){
-
- /* This is x25 specific area if protocol doesn't
- * match, return error */
- if (sll->sll_protocol != htons(X25_PROT))
- return -EINVAL;
-
- /* Find a free device
- */
- dev = wanpipe_find_free_dev(card);
- if (dev == NULL){
- DBG_PRINTK(KERN_INFO "wansock: No free network devices for card %s\n",
- card->devname);
- return -EINVAL;
- }
- }else{
- /* Bind a socket to a interface name
- * This is used by PVC mostly
- */
- strlcpy(name,sll->sll_device,sizeof(name));
- dev = dev_get_by_name(name);
- if (dev == NULL){
- printk(KERN_INFO "wansock: Failed to get Dev from name: %s,\n",
- name);
- return -ENODEV;
- }
-
- dev_put(dev);
-
- if (check_dev(dev, card)){
- printk(KERN_INFO "wansock: Device %s, doesn't belong to card %s\n",
- dev->name, card->devname);
- return -EINVAL;
- }
- if (get_atomic_device (dev))
- return -EINVAL;
- }
-
- return wanpipe_do_bind(sk, dev, sll->sll_protocol ? : wp->num);
-}
-
-/*============================================================
- * get_atomic_device
- *
- * Sets a bit atomically which indicates that
- * the interface is taken. This avoids race conditions.
- *===========================================================*/
-
-
-static inline int get_atomic_device(struct net_device *dev)
-{
- wanpipe_common_t *chan = dev->priv;
- if (!test_and_set_bit(0,(void *)&chan->rw_bind)){
- return 0;
- }
- return 1;
-}
-
-/*============================================================
- * check_dev
- *
- * Check that device name belongs to a particular card.
- *===========================================================*/
-
-static int check_dev(struct net_device *dev, sdla_t *card)
-{
- struct net_device* tmp_dev;
-
- for (tmp_dev = card->wandev.dev; tmp_dev;
- tmp_dev = *((struct net_device **)tmp_dev->priv)) {
- if (tmp_dev->ifindex == dev->ifindex){
- return 0;
- }
- }
- return 1;
-}
-
-/*============================================================
- * wanpipe_find_free_dev
- *
- * Find a free network interface. If found set atomic
- * bit indicating that the interface is taken.
- * X25API Specific.
- *===========================================================*/
-
-struct net_device *wanpipe_find_free_dev(sdla_t *card)
-{
- struct net_device* dev;
- volatile wanpipe_common_t *chan;
-
- if (test_and_set_bit(0,&find_free_critical)){
- printk(KERN_INFO "CRITICAL in Find Free\n");
- }
-
- for (dev = card->wandev.dev; dev;
- dev = *((struct net_device **)dev->priv)) {
- chan = dev->priv;
- if (!chan)
- continue;
- if (chan->usedby == API && chan->svc){
- if (!get_atomic_device (dev)){
- if (chan->state != WANSOCK_DISCONNECTED){
- release_device(dev);
- }else{
- clear_bit(0,&find_free_critical);
- return dev;
- }
- }
- }
- }
- clear_bit(0,&find_free_critical);
- return NULL;
-}
-
-/*============================================================
- * wanpipe_create
- *
- * SOCKET() System call. It allocates a sock structure
- * and adds the socket to the wanpipe_sk_list.
- * Crates AF_WANPIPE socket.
- *===========================================================*/
-
-static int wanpipe_create(struct socket *sock, int protocol)
-{
- struct sock *sk;
-
- //FIXME: This checks for root user, SECURITY ?
- //if (!capable(CAP_NET_RAW))
- // return -EPERM;
-
- if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
- return -ESOCKTNOSUPPORT;
-
- sock->state = SS_UNCONNECTED;
-
- if ((sk = wanpipe_alloc_socket()) == NULL)
- return -ENOBUFS;
-
- sk->sk_reuse = 1;
- sock->ops = &wanpipe_ops;
- sock_init_data(sock,sk);
-
- sock_reset_flag(sk, SOCK_ZAPPED);
- sk->sk_family = PF_WANPIPE;
- wp_sk(sk)->num = protocol;
- sk->sk_state = WANSOCK_DISCONNECTED;
- sk->sk_ack_backlog = 0;
- sk->sk_bound_dev_if = 0;
-
- atomic_inc(&wanpipe_socks_nr);
-
- /* We must disable interrupts because the ISR
- * can also change the list */
- set_bit(1,&wanpipe_tx_critical);
- write_lock(&wanpipe_sklist_lock);
- sk_add_node(sk, &wanpipe_sklist);
- write_unlock(&wanpipe_sklist_lock);
- clear_bit(1,&wanpipe_tx_critical);
-
- return(0);
-}
-
-
-/*============================================================
- * wanpipe_recvmsg
- *
- * Pull a packet from our receive queue and hand it
- * to the user. If necessary we block.
- *===========================================================*/
-
-static int wanpipe_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, int len, int flags)
-{
- struct sock *sk = sock->sk;
- struct sk_buff *skb;
- int copied, err=-ENOBUFS;
-
-
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
- */
-
- msg->msg_namelen = sizeof(struct wan_sockaddr_ll);
-
- /*
- * Call the generic datagram receiver. This handles all sorts
- * of horrible races and re-entrancy so we can forget about it
- * in the protocol layers.
- *
- * Now it will return ENETDOWN, if device have just gone down,
- * but then it will block.
- */
-
- if (flags & MSG_OOB){
- skb = skb_dequeue(&sk->sk_error_queue);
- }else{
- skb=skb_recv_datagram(sk,flags,1,&err);
- }
- /*
- * An error occurred so return it. Because skb_recv_datagram()
- * handles the blocking we don't see and worry about blocking
- * retries.
- */
-
- if(skb==NULL)
- goto out;
-
- /*
- * You lose any data beyond the buffer you gave. If it worries a
- * user program they can ask the device for its MTU anyway.
- */
-
- copied = skb->len;
- if (copied > len)
- {
- copied=len;
- msg->msg_flags|=MSG_TRUNC;
- }
-
- wanpipe_wakeup_driver(sk);
-
- /* We can't use skb_copy_datagram here */
- err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
- if (err)
- goto out_free;
-
- sock_recv_timestamp(msg, sk, skb);
-
- if (msg->msg_name)
- memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
-
- /*
- * Free or return the buffer as appropriate. Again this
- * hides all the races and re-entrancy issues from us.
- */
- err = (flags&MSG_TRUNC) ? skb->len : copied;
-
-out_free:
- skb_free_datagram(sk, skb);
-out:
- return err;
-}
-
-
-/*============================================================
- * wanpipe_wakeup_driver
- *
- * If socket receive buffer is full and driver cannot
- * pass data up the sock, it sets a packet_block flag.
- * This function check that flag and if sock receive
- * queue has room it kicks the driver BH handler.
- *
- * This way, driver doesn't have to poll the sock
- * receive queue.
- *===========================================================*/
-
-static void wanpipe_wakeup_driver(struct sock *sk)
-{
- struct net_device *dev = NULL;
- wanpipe_common_t *chan=NULL;
-
- dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (!dev)
- return;
-
- dev_put(dev);
-
- if ((chan = dev->priv) == NULL)
- return;
-
- if (atomic_read(&chan->receive_block)){
- if (atomic_read(&sk->sk_rmem_alloc) <
- ((unsigned)sk->sk_rcvbuf * 0.9)) {
- printk(KERN_INFO "wansock: Queuing task for wanpipe\n");
- atomic_set(&chan->receive_block,0);
- wanpipe_queue_tq(&chan->wanpipe_task);
- wanpipe_mark_bh();
- }
- }
-}
-
-/*============================================================
- * wanpipe_getname
- *
- * I don't know what to do with this yet.
- * User can use this function to get sock address
- * information. Not very useful for Sangoma's purposes.
- *===========================================================*/
-
-
-static int wanpipe_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
-{
- struct net_device *dev;
- struct sock *sk = sock->sk;
- struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)uaddr;
-
- sll->sll_family = AF_WANPIPE;
- sll->sll_ifindex = sk->sk_bound_dev_if;
- sll->sll_protocol = wp_sk(sk)->num;
- dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (dev) {
- sll->sll_hatype = dev->type;
- sll->sll_halen = dev->addr_len;
- memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
- } else {
- sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
- sll->sll_halen = 0;
- }
- *uaddr_len = sizeof(*sll);
-
- dev_put(dev);
-
- return 0;
-}
-
-/*============================================================
- * wanpipe_notifier
- *
- * If driver turns off network interface, this function
- * will be envoked. Currently I treate it as a
- * call disconnect. More thought should go into this
- * function.
- *
- * FIXME: More thought should go into this function.
- *
- *===========================================================*/
-
-static int wanpipe_notifier(struct notifier_block *this, unsigned long msg, void *data)
-{
- struct sock *sk;
- hlist_node *node;
- struct net_device *dev = (struct net_device *)data;
-
- sk_for_each(sk, node, &wanpipe_sklist) {
- struct wanpipe_opt *po = wp_sk(sk);
-
- if (!po)
- continue;
- if (dev == NULL)
- continue;
-
- switch (msg) {
- case NETDEV_DOWN:
- case NETDEV_UNREGISTER:
- if (dev->ifindex == sk->sk_bound_dev_if) {
- printk(KERN_INFO "wansock: Device down %s\n",dev->name);
- if (sock_flag(sk, SOCK_ZAPPED)) {
- wanpipe_unlink_driver(sk);
- sk->sk_err = ENETDOWN;
- sk->sk_error_report(sk);
- }
-
- if (msg == NETDEV_UNREGISTER) {
- printk(KERN_INFO "wansock: Unregistering Device: %s\n",
- dev->name);
- wanpipe_unlink_driver(sk);
- sk->sk_bound_dev_if = 0;
- }
- }
- break;
- case NETDEV_UP:
- if (dev->ifindex == sk->sk_bound_dev_if &&
- po->num && !sock_flag(sk, SOCK_ZAPPED)) {
- printk(KERN_INFO "wansock: Registering Device: %s\n",
- dev->name);
- wanpipe_link_driver(dev,sk);
- }
- break;
- }
- }
- return NOTIFY_DONE;
-}
-
-/*============================================================
- * wanpipe_ioctl
- *
- * Execute a user commands, and set socket options.
- *
- * FIXME: More thought should go into this function.
- *
- *===========================================================*/
-
-static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- struct sock *sk = sock->sk;
- int err;
-
- switch(cmd)
- {
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
- case SIOC_WANPIPE_CHECK_TX:
-
- return atomic_read(&sk->sk_wmem_alloc);
-
- case SIOC_WANPIPE_SOCK_STATE:
-
- if (sk->sk_state == WANSOCK_CONNECTED)
- return 0;
-
- return 1;
-
-
- case SIOC_WANPIPE_GET_CALL_DATA:
-
- return get_ioctl_cmd (sk,(void*)arg);
-
- case SIOC_WANPIPE_SET_CALL_DATA:
-
- return set_ioctl_cmd (sk,(void*)arg);
-
- case SIOC_WANPIPE_ACCEPT_CALL:
- case SIOC_WANPIPE_CLEAR_CALL:
- case SIOC_WANPIPE_RESET_CALL:
-
- if ((err=set_ioctl_cmd(sk,(void*)arg)) < 0)
- return err;
-
- err=wanpipe_exec_cmd(sk,cmd,0);
- get_ioctl_cmd(sk,(void*)arg);
- return err;
-
- case SIOC_WANPIPE_DEBUG:
-
- return wanpipe_debug(sk,(void*)arg);
-
- case SIOC_WANPIPE_SET_NONBLOCK:
-
- if (sk->sk_state != WANSOCK_DISCONNECTED)
- return -EINVAL;
-
- sock->file->f_flags |= O_NONBLOCK;
- return 0;
-
-#ifdef CONFIG_INET
- case SIOCADDRT:
- case SIOCDELRT:
- case SIOCDARP:
- case SIOCGARP:
- case SIOCSARP:
- case SIOCDRARP:
- case SIOCGRARP:
- case SIOCSRARP:
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- case SIOCGIFDSTADDR:
- case SIOCSIFDSTADDR:
- case SIOCSIFFLAGS:
- return inet_dgram_ops.ioctl(sock, cmd, arg);
-#endif
-
- default:
- return -ENOIOCTLCMD;
- }
- /*NOTREACHED*/
-}
-
-/*============================================================
- * wanpipe_debug
- *
- * This function will pass up information about all
- * active sockets.
- *
- * FIXME: More thought should go into this function.
- *
- *===========================================================*/
-
-static int wanpipe_debug (struct sock *origsk, void *arg)
-{
- struct sock *sk;
- struct hlist_node *node;
- struct net_device *dev = NULL;
- wanpipe_common_t *chan=NULL;
- int cnt=0, err=0;
- wan_debug_t *dbg_data = (wan_debug_t *)arg;
-
- sk_for_each(sk, node, &wanpipe_sklist) {
- wanpipe_opt *wp = wp_sk(sk);
-
- if (sk == origsk){
- continue;
- }
-
- if ((err=put_user(1, &dbg_data->debug[cnt].free)))
- return err;
- if ((err = put_user(sk->sk_state,
- &dbg_data->debug[cnt].state_sk)))
- return err;
- if ((err = put_user(sk->sk_rcvbuf,
- &dbg_data->debug[cnt].rcvbuf)))
- return err;
- if ((err = put_user(atomic_read(&sk->sk_rmem_alloc),
- &dbg_data->debug[cnt].rmem)))
- return err;
- if ((err = put_user(atomic_read(&sk->sk_wmem_alloc),
- &dbg_data->debug[cnt].wmem)))
- return err;
- if ((err = put_user(sk->sk_sndbuf,
- &dbg_data->debug[cnt].sndbuf)))
- return err;
- if ((err=put_user(sk_count, &dbg_data->debug[cnt].sk_count)))
- return err;
- if ((err=put_user(wp->poll_cnt, &dbg_data->debug[cnt].poll_cnt)))
- return err;
- if ((err = put_user(sk->sk_bound_dev_if,
- &dbg_data->debug[cnt].bound)))
- return err;
-
- if (sk->sk_bound_dev_if) {
- dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (!dev)
- continue;
-
- chan=dev->priv;
- dev_put(dev);
-
- if ((err=put_user(chan->state, &dbg_data->debug[cnt].d_state)))
- return err;
- if ((err=put_user(chan->svc, &dbg_data->debug[cnt].svc)))
- return err;
-
- if ((err=put_user(atomic_read(&chan->command),
- &dbg_data->debug[cnt].command)))
- return err;
-
-
- if (wp){
- sdla_t *card = (sdla_t*)wp->card;
-
- if (card){
- if ((err=put_user(atomic_read(&card->u.x.command_busy),
- &dbg_data->debug[cnt].cmd_busy)))
- return err;
- }
-
- if ((err=put_user(wp->lcn,
- &dbg_data->debug[cnt].lcn)))
- return err;
-
- if (wp->mbox) {
- if ((err=put_user(1, &dbg_data->debug[cnt].mbox)))
- return err;
- }
- }
-
- if ((err=put_user(atomic_read(&chan->receive_block),
- &dbg_data->debug[cnt].rblock)))
- return err;
-
- if (copy_to_user(dbg_data->debug[cnt].name, dev->name, strlen(dev->name)))
- return -EFAULT;
- }
-
- if (++cnt == MAX_NUM_DEBUG)
- break;
- }
- return 0;
-}
-
-/*============================================================
- * get_ioctl_cmd
- *
- * Pass up the contents of socket MBOX to the user.
- *===========================================================*/
-
-static int get_ioctl_cmd (struct sock *sk, void *arg)
-{
- x25api_t *usr_data = (x25api_t *)arg;
- mbox_cmd_t *mbox_ptr;
- int err;
-
- if (usr_data == NULL)
- return -EINVAL;
-
- if (!wp_sk(sk)->mbox) {
- return -EINVAL;
- }
-
- mbox_ptr = (mbox_cmd_t *)wp_sk(sk)->mbox;
-
- if ((err=put_user(mbox_ptr->cmd.qdm, &usr_data->hdr.qdm)))
- return err;
- if ((err=put_user(mbox_ptr->cmd.cause, &usr_data->hdr.cause)))
- return err;
- if ((err=put_user(mbox_ptr->cmd.diagn, &usr_data->hdr.diagn)))
- return err;
- if ((err=put_user(mbox_ptr->cmd.length, &usr_data->hdr.length)))
- return err;
- if ((err=put_user(mbox_ptr->cmd.result, &usr_data->hdr.result)))
- return err;
- if ((err=put_user(mbox_ptr->cmd.lcn, &usr_data->hdr.lcn)))
- return err;
-
- if (mbox_ptr->cmd.length > 0){
- if (mbox_ptr->cmd.length > X25_MAX_DATA)
- return -EINVAL;
-
- if (copy_to_user(usr_data->data, mbox_ptr->data, mbox_ptr->cmd.length)){
- printk(KERN_INFO "wansock: Copy failed !!!\n");
- return -EFAULT;
- }
- }
- return 0;
-}
-
-/*============================================================
- * set_ioctl_cmd
- *
- * Before command can be execute, socket MBOX must
- * be created, and initialized with user data.
- *===========================================================*/
-
-static int set_ioctl_cmd (struct sock *sk, void *arg)
-{
- x25api_t *usr_data = (x25api_t *)arg;
- mbox_cmd_t *mbox_ptr;
- int err;
-
- if (!wp_sk(sk)->mbox) {
- void *mbox_ptr;
- struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
- if (!dev)
- return -ENODEV;
-
- dev_put(dev);
-
- if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
- return -ENOMEM;
-
- wp_sk(sk)->mbox = mbox_ptr;
-
- wanpipe_link_driver(dev,sk);
- }
-
- mbox_ptr = (mbox_cmd_t*)wp_sk(sk)->mbox;
- memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
-
- if (usr_data == NULL){
- return 0;
- }
- if ((err=get_user(mbox_ptr->cmd.qdm, &usr_data->hdr.qdm)))
- return err;
- if ((err=get_user(mbox_ptr->cmd.cause, &usr_data->hdr.cause)))
- return err;
- if ((err=get_user(mbox_ptr->cmd.diagn, &usr_data->hdr.diagn)))
- return err;
- if ((err=get_user(mbox_ptr->cmd.length, &usr_data->hdr.length)))
- return err;
- if ((err=get_user(mbox_ptr->cmd.result, &usr_data->hdr.result)))
- return err;
-
- if (mbox_ptr->cmd.length > 0){
- if (mbox_ptr->cmd.length > X25_MAX_DATA)
- return -EINVAL;
-
- if (copy_from_user(mbox_ptr->data, usr_data->data, mbox_ptr->cmd.length)){
- printk(KERN_INFO "Copy failed\n");
- return -EFAULT;
- }
- }
- return 0;
-}
-
-
-/*======================================================================
- * wanpipe_poll
- *
- * Datagram poll: Again totally generic. This also handles
- * sequenced packet sockets providing the socket receive queue
- * is only ever holding data ready to receive.
- *
- * Note: when you _don't_ use this routine for this protocol,
- * and you use a different write policy from sock_writeable()
- * then please supply your own write_space callback.
- *=====================================================================*/
-
-unsigned int wanpipe_poll(struct file * file, struct socket *sock, poll_table *wait)
-{
- struct sock *sk = sock->sk;
- unsigned int mask;
-
- ++wp_sk(sk)->poll_cnt;
-
- poll_wait(file, sk->sk_sleep, wait);
- mask = 0;
-
- /* exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) {
- mask |= POLLPRI;
- return mask;
- }
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLHUP;
-
- /* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue)) {
- mask |= POLLIN | POLLRDNORM;
- }
-
- /* connection hasn't started yet */
- if (sk->sk_state == WANSOCK_CONNECTING) {
- return mask;
- }
-
- if (sk->sk_state == WANSOCK_DISCONNECTED) {
- mask = POLLPRI;
- return mask;
- }
-
- /* This check blocks the user process if there is
- * a packet already queued in the socket write queue.
- * This option is only for X25API protocol, for other
- * protocol like chdlc enable streaming mode,
- * where multiple packets can be pending in the socket
- * transmit queue */
-
- if (wp_sk(sk)->num == htons(X25_PROT)) {
- if (atomic_read(&wp_sk(sk)->packet_sent))
- return mask;
- }
-
- /* writable? */
- if (sock_writeable(sk)){
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- }else{
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- }
-
- return mask;
-}
-
-/*======================================================================
- * wanpipe_listen
- *
- * X25API Specific function. Set a socket into LISTENING MODE.
- *=====================================================================*/
-
-
-static int wanpipe_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
-
- /* This is x25 specific area if protocol doesn't
- * match, return error */
- if (wp_sk(sk)->num != htons(X25_PROT))
- return -EINVAL;
-
- if (sk->sk_state == WANSOCK_BIND_LISTEN) {
-
- sk->sk_max_ack_backlog = backlog;
- sk->sk_state = WANSOCK_LISTEN;
- return 0;
- }else{
- printk(KERN_INFO "wansock: Listening sock was not binded\n");
- }
-
- return -EINVAL;
-}
-
-/*======================================================================
- * wanpipe_link_card
- *
- * Connects the listening socket to the driver
- *=====================================================================*/
-
-static int wanpipe_link_card (struct sock *sk)
-{
- sdla_t *card = (sdla_t*)wp_sk(sk)->card;
-
- if (!card)
- return -ENOMEM;
-
- if ((card->sk != NULL) || (card->func != NULL)){
- printk(KERN_INFO "wansock: Listening queue is already established\n");
- return -EINVAL;
- }
-
- card->sk=sk;
- card->func=wanpipe_listen_rcv;
- sock_set_flag(sk, SOCK_ZAPPED);
-
- return 0;
-}
-
-/*======================================================================
- * wanpipe_listen
- *
- * X25API Specific function. Disconnect listening socket from
- * the driver.
- *=====================================================================*/
-
-static void wanpipe_unlink_card (struct sock *sk)
-{
- sdla_t *card = (sdla_t*)wp_sk(sk)->card;
-
- if (card){
- card->sk=NULL;
- card->func=NULL;
- }
-}
-
-/*======================================================================
- * wanpipe_exec_cmd
- *
- * Ioctl function calls this function to execute user command.
- * Connect() sytem call also calls this function to execute
- * place call. This function blocks until command is executed.
- *=====================================================================*/
-
-static int wanpipe_exec_cmd(struct sock *sk, int cmd, unsigned int flags)
-{
- int err = -EINVAL;
- wanpipe_opt *wp = wp_sk(sk);
- mbox_cmd_t *mbox_ptr = (mbox_cmd_t*)wp->mbox;
-
- if (!mbox_ptr){
- printk(KERN_INFO "NO MBOX PTR !!!!!\n");
- return -EINVAL;
- }
-
- /* This is x25 specific area if protocol doesn't
- * match, return error */
- if (wp->num != htons(X25_PROT))
- return -EINVAL;
-
-
- switch (cmd){
-
- case SIOC_WANPIPE_ACCEPT_CALL:
-
- if (sk->sk_state != WANSOCK_CONNECTING) {
- err = -EHOSTDOWN;
- break;
- }
-
- err = execute_command(sk,X25_ACCEPT_CALL,0);
- if (err < 0)
- break;
-
- /* Update. Mar6 2000.
- * Do not set the sock lcn number here, since
- * it is done in wanpipe_listen_rcv().
- */
- if (sk->sk_state == WANSOCK_CONNECTED) {
- wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;
- DBG_PRINTK(KERN_INFO "\nwansock: Accept OK %i\n",
- wp->lcn);
- err = 0;
-
- }else{
- DBG_PRINTK (KERN_INFO "\nwansock: Accept Failed %i\n",
- wp->lcn);
- wp->lcn = 0;
- err = -ECONNREFUSED;
- }
- break;
-
- case SIOC_WANPIPE_CLEAR_CALL:
-
- if (sk->sk_state == WANSOCK_DISCONNECTED) {
- err = -EINVAL;
- break;
- }
-
-
- /* Check if data buffers are pending for transmission,
- * if so, check whether user wants to wait until data
- * is transmitted, or clear a call and drop packets */
-
- if (atomic_read(&sk->sk_wmem_alloc) ||
- check_driver_busy(sk)) {
- mbox_cmd_t *mbox = wp->mbox;
- if (mbox->cmd.qdm & 0x80){
- mbox->cmd.result = 0x35;
- err = -EAGAIN;
- break;
- }
- }
-
- sk->sk_state = WANSOCK_DISCONNECTING;
-
- err = execute_command(sk,X25_CLEAR_CALL,0);
- if (err < 0)
- break;
-
- err = -ECONNREFUSED;
- if (sk->sk_state == WANSOCK_DISCONNECTED) {
- DBG_PRINTK(KERN_INFO "\nwansock: CLEAR OK %i\n",
- wp->lcn);
- wp->lcn = 0;
- err = 0;
- }
- break;
-
- case SIOC_WANPIPE_RESET_CALL:
-
- if (sk->sk_state != WANSOCK_CONNECTED) {
- err = -EINVAL;
- break;
- }
-
-
- /* Check if data buffers are pending for transmission,
- * if so, check whether user wants to wait until data
- * is transmitted, or reset a call and drop packets */
-
- if (atomic_read(&sk->sk_wmem_alloc) ||
- check_driver_busy(sk)) {
- mbox_cmd_t *mbox = wp->mbox;
- if (mbox->cmd.qdm & 0x80){
- mbox->cmd.result = 0x35;
- err = -EAGAIN;
- break;
- }
- }
-
-
- err = execute_command(sk, X25_RESET,0);
- if (err < 0)
- break;
-
- err = mbox_ptr->cmd.result;
- break;
-
-
- case X25_PLACE_CALL:
-
- err=execute_command(sk,X25_PLACE_CALL,flags);
- if (err < 0)
- break;
-
- if (sk->sk_state == WANSOCK_CONNECTED) {
-
- wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;
-
- DBG_PRINTK(KERN_INFO "\nwansock: PLACE CALL OK %i\n",
- wp->lcn);
- err = 0;
-
- } else if (sk->sk_state == WANSOCK_CONNECTING &&
- (flags & O_NONBLOCK)) {
- wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;
- DBG_PRINTK(KERN_INFO "\nwansock: Place Call OK: Waiting %i\n",
- wp->lcn);
-
- err = 0;
-
- }else{
- DBG_PRINTK(KERN_INFO "\nwansock: Place call Failed\n");
- err = -ECONNREFUSED;
- }
-
- break;
-
- default:
- return -EINVAL;
- }
-
- return err;
-}
-
-static int check_driver_busy (struct sock *sk)
-{
- struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
- wanpipe_common_t *chan;
-
- if (!dev)
- return 0;
-
- dev_put(dev);
-
- if ((chan=dev->priv) == NULL)
- return 0;
-
- return atomic_read(&chan->driver_busy);
-}
-
-
-/*======================================================================
- * wanpipe_accept
- *
- * ACCEPT() System call. X25API Specific function.
- * For each incoming call, create a new socket and
- * return it to the user.
- *=====================================================================*/
-
-static int wanpipe_accept(struct socket *sock, struct socket *newsock, int flags)
-{
- struct sock *sk;
- struct sock *newsk;
- struct sk_buff *skb;
- DECLARE_WAITQUEUE(wait, current);
- int err=0;
-
- if (newsock->sk != NULL){
- wanpipe_kill_sock_accept(newsock->sk);
- newsock->sk=NULL;
- }
-
- if ((sk = sock->sk) == NULL)
- return -EINVAL;
-
- if (sk->sk_type != SOCK_RAW)
- return -EOPNOTSUPP;
-
- if (sk->sk_state != WANSOCK_LISTEN)
- return -EINVAL;
-
- if (wp_sk(sk)->num != htons(X25_PROT))
- return -EINVAL;
-
- add_wait_queue(sk->sk_sleep,&wait);
- current->state = TASK_INTERRUPTIBLE;
- for (;;){
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb){
- err=0;
- break;
- }
- if (signal_pending(current)) {
- err = -ERESTARTSYS;
- break;
- }
- schedule();
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep,&wait);
-
- if (err != 0)
- return err;
-
- newsk = get_newsk_from_skb(skb);
- if (!newsk){
- return -EINVAL;
- }
-
- set_bit(1,&wanpipe_tx_critical);
- write_lock(&wanpipe_sklist_lock);
- sk_add_node(newsk, &wanpipe_sklist);
- write_unlock(&wanpipe_sklist_lock);
- clear_bit(1,&wanpipe_tx_critical);
-
- newsk->sk_socket = newsock;
- newsk->sk_sleep = &newsock->wait;
-
- /* Now attach up the new socket */
- sk->sk_ack_backlog--;
- newsock->sk = newsk;
-
- kfree_skb(skb);
-
- DBG_PRINTK(KERN_INFO "\nwansock: ACCEPT Got LCN %i\n",
- wp_sk(newsk)->lcn);
- return 0;
-}
-
-/*======================================================================
- * get_newsk_from_skb
- *
- * Accept() uses this function to get the address of the new
- * socket structure.
- *=====================================================================*/
-
-struct sock * get_newsk_from_skb (struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- wanpipe_common_t *chan;
-
- if (!dev){
- return NULL;
- }
-
- if ((chan = dev->priv) == NULL){
- return NULL;
- }
-
- if (!chan->sk){
- return NULL;
- }
- return (struct sock *)chan->sk;
-}
-
-/*======================================================================
- * wanpipe_connect
- *
- * CONNECT() System Call. X25API specific function
- * Check the state of the sock, and execute PLACE_CALL command.
- * Connect can ether block or return without waiting for connection,
- * if specified by user.
- *=====================================================================*/
-
-static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
-{
- struct sock *sk = sock->sk;
- struct wan_sockaddr_ll *addr = (struct wan_sockaddr_ll*)uaddr;
- struct net_device *dev;
- int err;
-
- if (wp_sk(sk)->num != htons(X25_PROT))
- return -EINVAL;
-
- if (sk->sk_state == WANSOCK_CONNECTED)
- return -EISCONN; /* No reconnect on a seqpacket socket */
-
- if (sk->sk_state != WAN_DISCONNECTED) {
- printk(KERN_INFO "wansock: Trying to connect on channel NON DISCONNECT\n");
- return -ECONNREFUSED;
- }
-
- sk->sk_state = WANSOCK_DISCONNECTED;
- sock->state = SS_UNCONNECTED;
-
- if (addr_len != sizeof(struct wan_sockaddr_ll))
- return -EINVAL;
-
- if (addr->sll_family != AF_WANPIPE)
- return -EINVAL;
-
- if ((dev = dev_get_by_index(sk->sk_bound_dev_if)) == NULL)
- return -ENETUNREACH;
-
- dev_put(dev);
-
- if (!sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
- return -EINVAL;
-
- sock->state = SS_CONNECTING;
- sk->sk_state = WANSOCK_CONNECTING;
-
- if (!wp_sk(sk)->mbox) {
- if (wp_sk (sk)->svc)
- return -EINVAL;
- else {
- int err;
- if ((err=set_ioctl_cmd(sk,NULL)) < 0)
- return err;
- }
- }
-
- if ((err=wanpipe_exec_cmd(sk, X25_PLACE_CALL,flags)) != 0){
- sock->state = SS_UNCONNECTED;
- sk->sk_state = WANSOCK_CONNECTED;
- return err;
- }
-
- if (sk->sk_state != WANSOCK_CONNECTED && (flags & O_NONBLOCK)) {
- return 0;
- }
-
- if (sk->sk_state != WANSOCK_CONNECTED) {
- sock->state = SS_UNCONNECTED;
- return -ECONNREFUSED;
- }
-
- sock->state = SS_CONNECTED;
- return 0;
-}
-
-const struct proto_ops wanpipe_ops = {
- .family = PF_WANPIPE,
- .owner = THIS_MODULE,
- .release = wanpipe_release,
- .bind = wanpipe_bind,
- .connect = wanpipe_connect,
- .socketpair = sock_no_socketpair,
- .accept = wanpipe_accept,
- .getname = wanpipe_getname,
- .poll = wanpipe_poll,
- .ioctl = wanpipe_ioctl,
- .listen = wanpipe_listen,
- .shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
- .sendmsg = wanpipe_sendmsg,
- .recvmsg = wanpipe_recvmsg
-};
-
-static struct net_proto_family wanpipe_family_ops = {
- .family = PF_WANPIPE,
- .create = wanpipe_create,
- .owner = THIS_MODULE,
-};
-
-struct notifier_block wanpipe_netdev_notifier = {
- .notifier_call = wanpipe_notifier,
-};
-
-
-#ifdef MODULE
-void cleanup_module(void)
-{
- printk(KERN_INFO "wansock: Cleaning up \n");
- unregister_netdevice_notifier(&wanpipe_netdev_notifier);
- sock_unregister(PF_WANPIPE);
- proto_unregister(&wanpipe_proto);
-}
-
-int init_module(void)
-{
- int rc;
-
- printk(KERN_INFO "wansock: Registering Socket \n");
-
- rc = proto_register(&wanpipe_proto, 0);
- if (rc != 0)
- goto out;
-
- sock_register(&wanpipe_family_ops);
- register_netdevice_notifier(&wanpipe_netdev_notifier);
-out:
- return rc;
-}
-#endif
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_WANPIPE);
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 5d2d93dc0837..7a19e0ede289 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -277,8 +277,8 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
skb_push(skb, 7);
skb->data[0] = 0;
skb->data[1] = NLPID_SNAP;
- memcpy(&skb->data[2], wanrouter_oui_ether,
- sizeof(wanrouter_oui_ether));
+ skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
+ sizeof(wanrouter_oui_ether));
*((unsigned short*)&skb->data[5]) = htons(type);
break;
@@ -339,7 +339,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->protocol = ethertype;
skb->pkt_type = PACKET_HOST; /* Physically point to point */
skb_pull(skb, cnt);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
return ethertype;
}
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
new file mode 100644
index 000000000000..a228d56a91b8
--- /dev/null
+++ b/net/wireless/Kconfig
@@ -0,0 +1,16 @@
+config CFG80211
+ tristate "Improved wireless configuration API"
+
+config WIRELESS_EXT
+ bool "Wireless extensions"
+ default n
+ ---help---
+ This option enables the legacy wireless extensions
+ (wireless network interface configuration via ioctls.)
+
+ Wireless extensions will be replaced by cfg80211 and
+ will be required only by legacy drivers that implement
+ wireless extension handlers.
+
+ Say N (if you can) unless you know you need wireless
+ extensions for external modules.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
new file mode 100644
index 000000000000..3a96ae60271c
--- /dev/null
+++ b/net/wireless/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_WIRELESS_EXT) += wext.o
+obj-$(CONFIG_CFG80211) += cfg80211.o
+
+cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
new file mode 100644
index 000000000000..7eabd55417a5
--- /dev/null
+++ b/net/wireless/core.c
@@ -0,0 +1,224 @@
+/*
+ * This is the linux wireless configuration interface.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/if.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/nl80211.h>
+#include <linux/debugfs.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <net/genetlink.h>
+#include <net/cfg80211.h>
+#include <net/wireless.h>
+#include "core.h"
+#include "sysfs.h"
+
+/* name for sysfs, %d is appended */
+#define PHY_NAME "phy"
+
+MODULE_AUTHOR("Johannes Berg");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("wireless configuration support");
+
+/* RCU might be appropriate here since we usually
+ * only read the list, and that can happen quite
+ * often because we need to do it for each command */
+LIST_HEAD(cfg80211_drv_list);
+DEFINE_MUTEX(cfg80211_drv_mutex);
+static int wiphy_counter;
+
+/* for debugfs */
+static struct dentry *ieee80211_debugfs_dir;
+
+/* exported functions */
+
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
+{
+ struct cfg80211_registered_device *drv;
+ int alloc_size;
+
+ alloc_size = sizeof(*drv) + sizeof_priv;
+
+ drv = kzalloc(alloc_size, GFP_KERNEL);
+ if (!drv)
+ return NULL;
+
+ drv->ops = ops;
+
+ mutex_lock(&cfg80211_drv_mutex);
+
+ drv->idx = wiphy_counter;
+
+ /* now increase counter for the next device unless
+ * it has wrapped previously */
+ if (wiphy_counter >= 0)
+ wiphy_counter++;
+
+ mutex_unlock(&cfg80211_drv_mutex);
+
+ if (unlikely(drv->idx < 0)) {
+ /* ugh, wrapped! */
+ kfree(drv);
+ return NULL;
+ }
+
+ /* give it a proper name */
+ snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
+ PHY_NAME "%d", drv->idx);
+
+ mutex_init(&drv->mtx);
+ mutex_init(&drv->devlist_mtx);
+ INIT_LIST_HEAD(&drv->netdev_list);
+
+ device_initialize(&drv->wiphy.dev);
+ drv->wiphy.dev.class = &ieee80211_class;
+ drv->wiphy.dev.platform_data = drv;
+
+ return &drv->wiphy;
+}
+EXPORT_SYMBOL(wiphy_new);
+
+int wiphy_register(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+ int res;
+
+ mutex_lock(&cfg80211_drv_mutex);
+
+ res = device_add(&drv->wiphy.dev);
+ if (res)
+ goto out_unlock;
+
+ list_add(&drv->list, &cfg80211_drv_list);
+
+ /* add to debugfs */
+ drv->wiphy.debugfsdir =
+ debugfs_create_dir(wiphy_name(&drv->wiphy),
+ ieee80211_debugfs_dir);
+
+ res = 0;
+out_unlock:
+ mutex_unlock(&cfg80211_drv_mutex);
+ return res;
+}
+EXPORT_SYMBOL(wiphy_register);
+
+void wiphy_unregister(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+ /* protect the device list */
+ mutex_lock(&cfg80211_drv_mutex);
+
+ BUG_ON(!list_empty(&drv->netdev_list));
+
+ /*
+ * Try to grab drv->mtx. If a command is still in progress,
+ * hopefully the driver will refuse it since it's tearing
+ * down the device already. We wait for this command to complete
+ * before unlinking the item from the list.
+ * Note: as codified by the BUG_ON above we cannot get here if
+ * a virtual interface is still associated. Hence, we can only
+ * get to lock contention here if userspace issues a command
+ * that identified the hardware by wiphy index.
+ */
+ mutex_lock(&drv->mtx);
+ /* unlock again before freeing */
+ mutex_unlock(&drv->mtx);
+
+ list_del(&drv->list);
+ device_del(&drv->wiphy.dev);
+ debugfs_remove(drv->wiphy.debugfsdir);
+
+ mutex_unlock(&cfg80211_drv_mutex);
+}
+EXPORT_SYMBOL(wiphy_unregister);
+
+void cfg80211_dev_free(struct cfg80211_registered_device *drv)
+{
+ mutex_destroy(&drv->mtx);
+ mutex_destroy(&drv->devlist_mtx);
+ kfree(drv);
+}
+
+void wiphy_free(struct wiphy *wiphy)
+{
+ put_device(&wiphy->dev);
+}
+EXPORT_SYMBOL(wiphy_free);
+
+static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
+ unsigned long state,
+ void *ndev)
+{
+ struct net_device *dev = ndev;
+ struct cfg80211_registered_device *rdev;
+
+ if (!dev->ieee80211_ptr)
+ return 0;
+
+ rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+
+ switch (state) {
+ case NETDEV_REGISTER:
+ mutex_lock(&rdev->devlist_mtx);
+ list_add(&dev->ieee80211_ptr->list, &rdev->netdev_list);
+ if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+ "phy80211")) {
+ printk(KERN_ERR "wireless: failed to add phy80211 "
+ "symlink to netdev!\n");
+ }
+ dev->ieee80211_ptr->netdev = dev;
+ mutex_unlock(&rdev->devlist_mtx);
+ break;
+ case NETDEV_UNREGISTER:
+ mutex_lock(&rdev->devlist_mtx);
+ if (!list_empty(&dev->ieee80211_ptr->list)) {
+ sysfs_remove_link(&dev->dev.kobj, "phy80211");
+ list_del_init(&dev->ieee80211_ptr->list);
+ }
+ mutex_unlock(&rdev->devlist_mtx);
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block cfg80211_netdev_notifier = {
+ .notifier_call = cfg80211_netdev_notifier_call,
+};
+
+static int cfg80211_init(void)
+{
+ int err = wiphy_sysfs_init();
+ if (err)
+ goto out_fail_sysfs;
+
+ err = register_netdevice_notifier(&cfg80211_netdev_notifier);
+ if (err)
+ goto out_fail_notifier;
+
+ ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
+
+ return 0;
+
+out_fail_notifier:
+ wiphy_sysfs_exit();
+out_fail_sysfs:
+ return err;
+}
+module_init(cfg80211_init);
+
+static void cfg80211_exit(void)
+{
+ debugfs_remove(ieee80211_debugfs_dir);
+ unregister_netdevice_notifier(&cfg80211_netdev_notifier);
+ wiphy_sysfs_exit();
+}
+module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
new file mode 100644
index 000000000000..158db1edb92a
--- /dev/null
+++ b/net/wireless/core.h
@@ -0,0 +1,49 @@
+/*
+ * Wireless configuration interface internals.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef __NET_WIRELESS_CORE_H
+#define __NET_WIRELESS_CORE_H
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/wireless.h>
+#include <net/cfg80211.h>
+
+struct cfg80211_registered_device {
+ struct cfg80211_ops *ops;
+ struct list_head list;
+ /* we hold this mutex during any call so that
+ * we cannot do multiple calls at once, and also
+ * to avoid the deregister call to proceed while
+ * any call is in progress */
+ struct mutex mtx;
+
+ /* wiphy index, internal only */
+ int idx;
+
+ /* associate netdev list */
+ struct mutex devlist_mtx;
+ struct list_head netdev_list;
+
+ /* must be last because of the way we do wiphy_priv(),
+ * and it should at least be aligned to NETDEV_ALIGN */
+ struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+static inline
+struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+{
+ BUG_ON(!wiphy);
+ return container_of(wiphy, struct cfg80211_registered_device, wiphy);
+}
+
+extern struct mutex cfg80211_drv_mutex;
+extern struct list_head cfg80211_drv_list;
+
+/* free object */
+extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
+
+#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
new file mode 100644
index 000000000000..3ebae1442963
--- /dev/null
+++ b/net/wireless/sysfs.c
@@ -0,0 +1,80 @@
+/*
+ * This file provides /sys/class/ieee80211/<wiphy name>/
+ * and some default attributes.
+ *
+ * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/nl80211.h>
+#include <linux/rtnetlink.h>
+#include <net/cfg80211.h>
+#include "sysfs.h"
+#include "core.h"
+
+static inline struct cfg80211_registered_device *dev_to_rdev(
+ struct device *dev)
+{
+ return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
+}
+
+static ssize_t _show_index(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx);
+}
+
+static ssize_t _show_permaddr(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ char *addr = dev_to_rdev(dev)->wiphy.perm_addr;
+
+ return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+}
+
+static struct device_attribute ieee80211_dev_attrs[] = {
+ __ATTR(index, S_IRUGO, _show_index, NULL),
+ __ATTR(macaddress, S_IRUGO, _show_permaddr, NULL),
+ {}
+};
+
+static void wiphy_dev_release(struct device *dev)
+{
+ struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
+
+ cfg80211_dev_free(rdev);
+}
+
+static int wiphy_uevent(struct device *dev, char **envp,
+ int num_envp, char *buf, int size)
+{
+ /* TODO, we probably need stuff here */
+ return 0;
+}
+
+struct class ieee80211_class = {
+ .name = "ieee80211",
+ .owner = THIS_MODULE,
+ .dev_release = wiphy_dev_release,
+ .dev_attrs = ieee80211_dev_attrs,
+#ifdef CONFIG_HOTPLUG
+ .dev_uevent = wiphy_uevent,
+#endif
+};
+
+int wiphy_sysfs_init(void)
+{
+ return class_register(&ieee80211_class);
+}
+
+void wiphy_sysfs_exit(void)
+{
+ class_unregister(&ieee80211_class);
+}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
new file mode 100644
index 000000000000..65acbebd3711
--- /dev/null
+++ b/net/wireless/sysfs.h
@@ -0,0 +1,9 @@
+#ifndef __WIRELESS_SYSFS_H
+#define __WIRELESS_SYSFS_H
+
+extern int wiphy_sysfs_init(void);
+extern void wiphy_sysfs_exit(void);
+
+extern struct class ieee80211_class;
+
+#endif /* __WIRELESS_SYSFS_H */
diff --git a/net/core/wireless.c b/net/wireless/wext.c
index 9936ab11e6e0..d6aaf65192e9 100644
--- a/net/core/wireless.c
+++ b/net/wireless/wext.c
@@ -2,7 +2,7 @@
* This file implement the Wireless Extensions APIs.
*
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
*
* (As all part of the Linux kernel, this file is GPL)
*/
@@ -76,6 +76,9 @@
* o Change length in ESSID and NICK to strlen() instead of strlen()+1
* o Make standard_ioctl_num and standard_event_num unsigned
* o Remove (struct net_device *)->get_wireless_stats()
+ *
+ * v10 - 16.3.07 - Jean II
+ * o Prevent leaking of kernel space in stream on 64 bits.
*/
/***************************** INCLUDES *****************************/
@@ -94,22 +97,10 @@
#include <linux/wireless.h> /* Pretty obvious */
#include <net/iw_handler.h> /* New driver API */
#include <net/netlink.h>
+#include <net/wext.h>
#include <asm/uaccess.h> /* copy_to_user() */
-/**************************** CONSTANTS ****************************/
-
-/* Debugging stuff */
-#undef WE_IOCTL_DEBUG /* Debug IOCTL API */
-#undef WE_RTNETLINK_DEBUG /* Debug RtNetlink API */
-#undef WE_EVENT_DEBUG /* Debug Event dispatcher */
-#undef WE_SPY_DEBUG /* Debug enhanced spy support */
-
-/* Options */
-//CONFIG_NET_WIRELESS_RTNETLINK /* Wireless requests over RtNetlink */
-#define WE_EVENT_RTNETLINK /* Propagate events using RtNetlink */
-#define WE_SET_EVENT /* Generate an event on some set commands */
-
/************************* GLOBAL VARIABLES *************************/
/*
* You should not use global variables, because of re-entrancy.
@@ -346,8 +337,7 @@ static const struct iw_ioctl_description standard_ioctl[] = {
.max_tokens = sizeof(struct iw_pmksa),
},
};
-static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
/*
* Meta-data about all the additional standard Wireless Extension events
@@ -397,8 +387,7 @@ static const struct iw_ioctl_description standard_event[] = {
.max_tokens = sizeof(struct iw_pmkid_cand),
},
};
-static const unsigned standard_event_num = (sizeof(standard_event) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
/* Size (in bytes) of the various private data types */
static const char iw_priv_type_size[] = {
@@ -427,6 +416,21 @@ static const int event_type_size[] = {
IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */
};
+/* Size (in bytes) of various events, as packed */
+static const int event_type_pk_size[] = {
+ IW_EV_LCP_PK_LEN, /* IW_HEADER_TYPE_NULL */
+ 0,
+ IW_EV_CHAR_PK_LEN, /* IW_HEADER_TYPE_CHAR */
+ 0,
+ IW_EV_UINT_PK_LEN, /* IW_HEADER_TYPE_UINT */
+ IW_EV_FREQ_PK_LEN, /* IW_HEADER_TYPE_FREQ */
+ IW_EV_ADDR_PK_LEN, /* IW_HEADER_TYPE_ADDR */
+ 0,
+ IW_EV_POINT_PK_LEN, /* Without variable payload */
+ IW_EV_PARAM_PK_LEN, /* IW_HEADER_TYPE_PARAM */
+ IW_EV_QUAL_PK_LEN, /* IW_HEADER_TYPE_QUAL */
+};
+
/************************ COMMON SUBROUTINES ************************/
/*
* Stuff that may be used in various place or doesn't fit in one
@@ -436,26 +440,24 @@ static const int event_type_size[] = {
/* ---------------------------------------------------------------- */
/*
* Return the driver handler associated with a specific Wireless Extension.
- * Called from various place, so make sure it remains efficient.
*/
-static inline iw_handler get_handler(struct net_device *dev,
- unsigned int cmd)
+static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
{
/* Don't "optimise" the following variable, it will crash */
unsigned int index; /* *MUST* be unsigned */
/* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers == NULL)
+ if (dev->wireless_handlers == NULL)
return NULL;
/* Try as a standard command */
index = cmd - SIOCIWFIRST;
- if(index < dev->wireless_handlers->num_standard)
+ if (index < dev->wireless_handlers->num_standard)
return dev->wireless_handlers->standard[index];
/* Try as a private command */
index = cmd - SIOCIWFIRSTPRIV;
- if(index < dev->wireless_handlers->num_private)
+ if (index < dev->wireless_handlers->num_private)
return dev->wireless_handlers->private[index];
/* Not found */
@@ -466,15 +468,15 @@ static inline iw_handler get_handler(struct net_device *dev,
/*
* Get statistics out of the driver
*/
-static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+static struct iw_statistics *get_wireless_stats(struct net_device *dev)
{
/* New location */
- if((dev->wireless_handlers != NULL) &&
+ if ((dev->wireless_handlers != NULL) &&
(dev->wireless_handlers->get_wireless_stats != NULL))
return dev->wireless_handlers->get_wireless_stats(dev);
/* Not found */
- return (struct iw_statistics *) NULL;
+ return NULL;
}
/* ---------------------------------------------------------------- */
@@ -496,14 +498,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
* netif_running(dev) test. I'm open on that one...
* Hopefully, the driver will remember to do a commit in "open()" ;-)
*/
-static inline int call_commit_handler(struct net_device * dev)
+static int call_commit_handler(struct net_device *dev)
{
- if((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL)) {
+ if ((netif_running(dev)) &&
+ (dev->wireless_handlers->standard[0] != NULL))
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
- } else
+ else
return 0; /* Command completed successfully */
}
@@ -552,14 +554,13 @@ static int iw_handler_get_iwstats(struct net_device * dev,
struct iw_statistics *stats;
stats = get_wireless_stats(dev);
- if (stats != (struct iw_statistics *) NULL) {
-
+ if (stats) {
/* Copy statistics to extra */
memcpy(extra, stats, sizeof(struct iw_statistics));
wrqu->data.length = sizeof(struct iw_statistics);
/* Check if we need to clear the updated flag */
- if(wrqu->data.flags != 0)
+ if (wrqu->data.flags != 0)
stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
} else
@@ -578,12 +579,12 @@ static int iw_handler_get_private(struct net_device * dev,
char * extra)
{
/* Check if the driver has something to export */
- if((dev->wireless_handlers->num_private_args == 0) ||
+ if ((dev->wireless_handlers->num_private_args == 0) ||
(dev->wireless_handlers->private_args == NULL))
return -EOPNOTSUPP;
/* Check if there is enough buffer up there */
- if(wrqu->data.length < dev->wireless_handlers->num_private_args) {
+ if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
/* User space can't know in advance how large the buffer
* needs to be. Give it a hint, so that we can support
* any size buffer we want somewhat efficiently... */
@@ -618,8 +619,8 @@ static int iw_handler_get_private(struct net_device * dev,
/*
* Print one entry (line) of /proc/net/wireless
*/
-static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
- struct net_device *dev)
+static void wireless_seq_printf_stats(struct seq_file *seq,
+ struct net_device *dev)
{
/* Get stats from the driver */
struct iw_statistics *stats = get_wireless_stats(dev);
@@ -662,7 +663,7 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations wireless_seq_ops = {
+static const struct seq_operations wireless_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
.stop = dev_seq_stop,
@@ -682,7 +683,7 @@ static const struct file_operations wireless_seq_fops = {
.release = seq_release,
};
-int __init wireless_proc_init(void)
+int __init wext_proc_init(void)
{
/* Create /proc/net/wireless entry */
if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
@@ -717,32 +718,24 @@ static int ioctl_standard_call(struct net_device * dev,
int ret = -EINVAL;
/* Get the description of the IOCTL */
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+ if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
return -EOPNOTSUPP;
descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_IOCTL_DEBUG */
-
/* Prepare the call */
info.cmd = cmd;
info.flags = 0;
/* Check if we have a pointer to user space data or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
+ if (descr->header_type != IW_HEADER_TYPE_POINT) {
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), NULL);
-#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+ if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT)))
wireless_send_event(dev, cmd, &(iwr->u), NULL);
-#endif /* WE_SET_EVENT */
} else {
char * extra;
int extra_size;
@@ -782,19 +775,19 @@ static int ioctl_standard_call(struct net_device * dev,
iwr->u.data.length -= essid_compat;
/* Check what user space is giving us */
- if(IW_IS_SET(cmd)) {
+ if (IW_IS_SET(cmd)) {
/* Check NULL pointer */
- if((iwr->u.data.pointer == NULL) &&
+ if ((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
/* Check if number of token fits within bounds */
- if(iwr->u.data.length > descr->max_tokens)
+ if (iwr->u.data.length > descr->max_tokens)
return -E2BIG;
- if(iwr->u.data.length < descr->min_tokens)
+ if (iwr->u.data.length < descr->min_tokens)
return -EINVAL;
} else {
/* Check NULL pointer */
- if(iwr->u.data.pointer == NULL)
+ if (iwr->u.data.pointer == NULL)
return -EFAULT;
/* Save user space buffer size for checking */
user_length = iwr->u.data.length;
@@ -804,7 +797,7 @@ static int ioctl_standard_call(struct net_device * dev,
* implied by the test at the end. */
/* Support for very large requests */
- if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+ if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
(user_length > descr->max_tokens)) {
/* Allow userspace to GET more than max so
* we can support any size GET requests.
@@ -817,20 +810,14 @@ static int ioctl_standard_call(struct net_device * dev,
}
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_IOCTL_DEBUG */
-
/* Create the kernel buffer */
/* kzalloc ensures NULL-termination for essid_compat */
extra = kzalloc(extra_size, GFP_KERNEL);
- if (extra == NULL) {
+ if (extra == NULL)
return -ENOMEM;
- }
/* If it is a SET, get all the extra data in here */
- if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+ if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
iwr->u.data.length *
descr->token_size);
@@ -838,11 +825,6 @@ static int ioctl_standard_call(struct net_device * dev,
kfree(extra);
return -EFAULT;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
- dev->name,
- iwr->u.data.length * descr->token_size);
-#endif /* WE_IOCTL_DEBUG */
}
/* Call the handler */
@@ -853,7 +835,7 @@ static int ioctl_standard_call(struct net_device * dev,
/* If we have something to return to the user */
if (!ret && IW_IS_GET(cmd)) {
/* Check if there is enough buffer up there */
- if(user_length < iwr->u.data.length) {
+ if (user_length < iwr->u.data.length) {
kfree(extra);
return -E2BIG;
}
@@ -863,18 +845,12 @@ static int ioctl_standard_call(struct net_device * dev,
descr->token_size);
if (err)
ret = -EFAULT;
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
- dev->name,
- iwr->u.data.length * descr->token_size);
-#endif /* WE_IOCTL_DEBUG */
}
-#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+ if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT))) {
- if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+ if (descr->flags & IW_DESCR_FLAG_RESTRICT)
/* If the event is restricted, don't
* export the payload */
wireless_send_event(dev, cmd, &(iwr->u), NULL);
@@ -882,14 +858,13 @@ static int ioctl_standard_call(struct net_device * dev,
wireless_send_event(dev, cmd, &(iwr->u),
extra);
}
-#endif /* WE_SET_EVENT */
/* Cleanup - I told you it wasn't that long ;-) */
kfree(extra);
}
/* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
+ if (ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
/* Here, we will generate the appropriate event if needed */
@@ -913,10 +888,8 @@ static int ioctl_standard_call(struct net_device * dev,
* a iw_handler but process it in your ioctl handler (i.e. use the
* old driver API).
*/
-static inline int ioctl_private_call(struct net_device * dev,
- struct ifreq * ifr,
- unsigned int cmd,
- iw_handler handler)
+static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+ unsigned int cmd, iw_handler handler)
{
struct iwreq * iwr = (struct iwreq *) ifr;
const struct iw_priv_args * descr = NULL;
@@ -926,28 +899,18 @@ static inline int ioctl_private_call(struct net_device * dev,
int ret = -EINVAL;
/* Get the description of the IOCTL */
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+ for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+ if (cmd == dev->wireless_handlers->private_args[i].cmd) {
descr = &(dev->wireless_handlers->private_args[i]);
break;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- if(descr) {
- printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
- dev->name, descr->name,
- descr->set_args, descr->get_args);
- }
-#endif /* WE_IOCTL_DEBUG */
-
/* Compute the size of the set/get arguments */
- if(descr != NULL) {
- if(IW_IS_SET(cmd)) {
+ if (descr != NULL) {
+ if (IW_IS_SET(cmd)) {
int offset = 0; /* For sub-ioctls */
/* Check for sub-ioctl handler */
- if(descr->name[0] == '\0')
+ if (descr->name[0] == '\0')
/* Reserve one int for sub-ioctl index */
offset = sizeof(__u32);
@@ -955,7 +918,7 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size = get_priv_size(descr->set_args);
/* Does it fits in iwr ? */
- if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+ if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
((extra_size + offset) <= IFNAMSIZ))
extra_size = 0;
} else {
@@ -963,7 +926,7 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size = get_priv_size(descr->get_args);
/* Does it fits in iwr ? */
- if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+ if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
(extra_size <= IFNAMSIZ))
extra_size = 0;
}
@@ -974,7 +937,7 @@ static inline int ioctl_private_call(struct net_device * dev,
info.flags = 0;
/* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
+ if (extra_size == 0) {
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
} else {
@@ -982,46 +945,33 @@ static inline int ioctl_private_call(struct net_device * dev,
int err;
/* Check what user space is giving us */
- if(IW_IS_SET(cmd)) {
+ if (IW_IS_SET(cmd)) {
/* Check NULL pointer */
- if((iwr->u.data.pointer == NULL) &&
+ if ((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
/* Does it fits within bounds ? */
- if(iwr->u.data.length > (descr->set_args &
+ if (iwr->u.data.length > (descr->set_args &
IW_PRIV_SIZE_MASK))
return -E2BIG;
- } else {
- /* Check NULL pointer */
- if(iwr->u.data.pointer == NULL)
- return -EFAULT;
- }
-
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_IOCTL_DEBUG */
+ } else if (iwr->u.data.pointer == NULL)
+ return -EFAULT;
/* Always allocate for max space. Easier, and won't last
* long... */
extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL) {
+ if (extra == NULL)
return -ENOMEM;
- }
/* If it is a SET, get all the extra data in here */
- if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+ if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
extra_size);
if (err) {
kfree(extra);
return -EFAULT;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
- dev->name, iwr->u.data.length);
-#endif /* WE_IOCTL_DEBUG */
}
/* Call the handler */
@@ -1041,10 +991,6 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size);
if (err)
ret = -EFAULT;
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
- dev->name, iwr->u.data.length);
-#endif /* WE_IOCTL_DEBUG */
}
/* Cleanup - I told you it wasn't that long ;-) */
@@ -1053,7 +999,7 @@ static inline int ioctl_private_call(struct net_device * dev,
/* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
+ if (ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
return ret;
@@ -1061,11 +1007,10 @@ static inline int ioctl_private_call(struct net_device * dev,
/* ---------------------------------------------------------------- */
/*
- * Main IOCTl dispatcher. Called from the main networking code
- * (dev_ioctl() in net/core/dev.c).
+ * Main IOCTl dispatcher.
* Check the type of IOCTL and call the appropriate wrapper...
*/
-int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
{
struct net_device *dev;
iw_handler handler;
@@ -1080,789 +1025,54 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
/* A bunch of special cases, then the generic case...
* Note that 'cmd' is already filtered in dev_ioctl() with
* (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
- switch(cmd)
- {
- case SIOCGIWSTATS:
- /* Get Wireless Stats */
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- &iw_handler_get_iwstats);
-
- case SIOCGIWPRIV:
- /* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers != NULL) {
- /* We export to user space the definition of
- * the private handler ourselves */
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- &iw_handler_get_private);
- }
- // ## Fall-through for old API ##
- default:
- /* Generic IOCTL */
- /* Basic check */
- if (!netif_device_present(dev))
- return -ENODEV;
- /* New driver API : try to find the handler */
- handler = get_handler(dev, cmd);
- if(handler != NULL) {
- /* Standard and private are not the same */
- if(cmd < SIOCIWFIRSTPRIV)
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- handler);
- else
- return ioctl_private_call(dev,
- ifr,
- cmd,
- handler);
- }
- /* Old driver API : call driver ioctl handler */
- if (dev->do_ioctl) {
- return dev->do_ioctl(dev, ifr, cmd);
- }
- return -EOPNOTSUPP;
- }
- /* Not reached */
- return -EINVAL;
-}
-
-/********************** RTNETLINK REQUEST API **********************/
-/*
- * The alternate user space API to configure all those Wireless Extensions
- * is through RtNetlink.
- * This API support only the new driver API (iw_handler).
- *
- * This RtNetlink API use the same query/reply model as the ioctl API.
- * Maximum effort has been done to fit in the RtNetlink model, and
- * we support both RtNetlink Set and RtNelink Get operations.
- * On the other hand, we don't offer Dump operations because of the
- * following reasons :
- * o Large number of parameters, most optional
- * o Large size of some parameters (> 100 bytes)
- * o Each parameters need to be extracted from hardware
- * o Scan requests can take seconds and disable network activity.
- * Because of this high cost/overhead, we want to return only the
- * parameters the user application is really interested in.
- * We could offer partial Dump using the IW_DESCR_FLAG_DUMP flag.
- *
- * The API uses the standard RtNetlink socket. When the RtNetlink code
- * find a IFLA_WIRELESS field in a RtNetlink SET_LINK request,
- * it calls here.
- */
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension GET handler.
- * We do various checks and call the handler with the proper args.
- */
-static int rtnetlink_standard_get(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler,
- char ** p_buf,
- int * p_len)
-{
- const struct iw_ioctl_description * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- int hdr_len;
- struct iw_request_info info;
- char * buffer = NULL;
- int buffer_size = 0;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
- return -EOPNOTSUPP;
- descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found standard handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Check if wrqu is complete */
- hdr_len = event_type_size[descr->header_type];
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have extra data in the reply or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
- /* Create the kernel buffer that we will return.
- * It's at an offset to match the TYPE_POINT case... */
- buffer_size = request_len + IW_EV_POINT_OFF;
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
- /* Copy event data */
- memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
- /* Use our own copy of wrqu */
- wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
- + IW_EV_LCP_LEN);
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, NULL);
-
- } else {
- union iwreq_data wrqu_point;
- char * extra = NULL;
- int extra_size = 0;
-
- /* Get a temp copy of wrqu (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- ((char *) request) + IW_EV_LCP_LEN,
- IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-
- /* Calculate space needed by arguments. Always allocate
- * for max space. Easier, and won't last long... */
- extra_size = descr->max_tokens * descr->token_size;
- /* Support for very large requests */
- if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
- (wrqu_point.data.length > descr->max_tokens))
- extra_size = (wrqu_point.data.length
- * descr->token_size);
- buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
- dev->name, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Create the kernel buffer that we will return */
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
-
- /* Put wrqu in the right place (just before extra).
- * Leave space for IWE header and dummy pointer...
- * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
- */
- memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
- ((char *) &wrqu_point) + IW_EV_POINT_OFF,
- IW_EV_POINT_LEN - IW_EV_LCP_LEN);
- wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
-
- /* Extra comes logically after that. Offset +12 bytes. */
- extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
-
- /* Call the handler */
- ret = handler(dev, &info, wrqu, extra);
-
- /* Calculate real returned length */
- extra_size = (wrqu->data.length * descr->token_size);
- /* Re-adjust reply size */
- request->len = extra_size + IW_EV_POINT_LEN;
-
- /* Put the iwe header where it should, i.e. scrap the
- * dummy pointer. */
- memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Check if there is enough buffer up there */
- if(wrqu_point.data.length < wrqu->data.length)
- ret = -E2BIG;
- }
-
- /* Return the buffer to the caller */
- if (!ret) {
- *p_buf = buffer;
- *p_len = request->len;
- } else {
- /* Cleanup */
- if(buffer)
- kfree(buffer);
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension SET handler.
- * We do various checks and call the handler with the proper args.
- */
-static inline int rtnetlink_standard_set(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler)
-{
- const struct iw_ioctl_description * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- union iwreq_data wrqu_point;
- int hdr_len;
- char * extra = NULL;
- int extra_size = 0;
- struct iw_request_info info;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
- return -EOPNOTSUPP;
- descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found standard SET handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Extract fixed header from request. This is properly aligned. */
- wrqu = &request->u;
-
- /* Check if wrqu is complete */
- hdr_len = event_type_size[descr->header_type];
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have extra data in the request or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, NULL);
-
- } else {
- int extra_len;
-
- /* Put wrqu in the right place (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
- /* Don't forget about the event code... */
- wrqu = &wrqu_point;
-
- /* Check if number of token fits within bounds */
- if(wrqu_point.data.length > descr->max_tokens)
- return -E2BIG;
- if(wrqu_point.data.length < descr->min_tokens)
- return -EINVAL;
-
- /* Real length of payload */
- extra_len = wrqu_point.data.length * descr->token_size;
-
- /* Check if request is self consistent */
- if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Always allocate for max space. Easier, and won't last
- * long... */
- extra_size = descr->max_tokens * descr->token_size;
- extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL)
- return -ENOMEM;
-
- /* Copy extra in aligned buffer */
- memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
- /* Call the handler */
- ret = handler(dev, &info, &wrqu_point, extra);
- }
-
-#ifdef WE_SET_EVENT
- /* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
- ((ret == 0) || (ret == -EIWCOMMIT))) {
- if(descr->flags & IW_DESCR_FLAG_RESTRICT)
- /* If the event is restricted, don't
- * export the payload */
- wireless_send_event(dev, cmd, wrqu, NULL);
- else
- wireless_send_event(dev, cmd, wrqu, extra);
- }
-#endif /* WE_SET_EVENT */
-
- /* Cleanup - I told you it wasn't that long ;-) */
- if(extra)
- kfree(extra);
-
- /* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
- ret = call_commit_handler(dev);
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension GET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_get(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler,
- char ** p_buf,
- int * p_len)
-{
- const struct iw_priv_args * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- int hdr_len;
- struct iw_request_info info;
- int extra_size = 0;
- int i;
- char * buffer = NULL;
- int buffer_size = 0;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
- descr = &(dev->wireless_handlers->private_args[i]);
- break;
- }
- if(descr == NULL)
- return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
- dev->name, descr->name, descr->set_args, descr->get_args);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Compute the max size of the get arguments */
- extra_size = get_priv_size(descr->get_args);
-
- /* Does it fits in wrqu ? */
- if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
- (extra_size <= IFNAMSIZ)) {
- hdr_len = extra_size;
- extra_size = 0;
- } else {
- hdr_len = IW_EV_POINT_LEN;
- }
-
- /* Check if wrqu is complete */
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
-
- /* Create the kernel buffer that we will return.
- * It's at an offset to match the TYPE_POINT case... */
- buffer_size = request_len + IW_EV_POINT_OFF;
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
- /* Copy event data */
- memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
- /* Use our own copy of wrqu */
- wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
- + IW_EV_LCP_LEN);
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, (char *) wrqu);
-
- } else {
- char * extra;
-
- /* Buffer for full reply */
- buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
- dev->name, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Create the kernel buffer that we will return */
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
-
- /* Put wrqu in the right place (just before extra).
- * Leave space for IWE header and dummy pointer...
- * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
- */
- memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
- ((char *) request) + IW_EV_LCP_LEN,
- IW_EV_POINT_LEN - IW_EV_LCP_LEN);
- wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
-
- /* Extra comes logically after that. Offset +12 bytes. */
- extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
-
- /* Call the handler */
- ret = handler(dev, &info, wrqu, extra);
-
- /* Adjust for the actual length if it's variable,
- * avoid leaking kernel bits outside. */
- if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
- extra_size = adjust_priv_size(descr->get_args, wrqu);
- /* Re-adjust reply size */
- request->len = extra_size + IW_EV_POINT_LEN;
-
- /* Put the iwe header where it should, i.e. scrap the
- * dummy pointer. */
- memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
- }
-
- /* Return the buffer to the caller */
- if (!ret) {
- *p_buf = buffer;
- *p_len = request->len;
- } else {
- /* Cleanup */
- if(buffer)
- kfree(buffer);
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension SET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_set(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler)
-{
- const struct iw_priv_args * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- union iwreq_data wrqu_point;
- int hdr_len;
- char * extra = NULL;
- int extra_size = 0;
- int offset = 0; /* For sub-ioctls */
- struct iw_request_info info;
- int i;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
- descr = &(dev->wireless_handlers->private_args[i]);
- break;
- }
- if(descr == NULL)
- return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
- dev->name, descr->name, descr->set_args, descr->get_args);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Compute the size of the set arguments */
- /* Check for sub-ioctl handler */
- if(descr->name[0] == '\0')
- /* Reserve one int for sub-ioctl index */
- offset = sizeof(__u32);
-
- /* Size of set arguments */
- extra_size = get_priv_size(descr->set_args);
-
- /* Does it fits in wrqu ? */
- if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
- (extra_size <= IFNAMSIZ)) {
- hdr_len = IW_EV_LCP_LEN + extra_size;
- extra_size = 0;
- } else {
- hdr_len = IW_EV_POINT_LEN;
- }
+ if (cmd == SIOCGIWSTATS)
+ return ioctl_standard_call(dev, ifr, cmd,
+ &iw_handler_get_iwstats);
- /* Extract fixed header from request. This is properly aligned. */
- wrqu = &request->u;
-
- /* Check if wrqu is complete */
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, (char *) wrqu);
-
- } else {
- int extra_len;
-
- /* Put wrqu in the right place (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-
- /* Does it fits within bounds ? */
- if(wrqu_point.data.length > (descr->set_args &
- IW_PRIV_SIZE_MASK))
- return -E2BIG;
-
- /* Real length of payload */
- extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
-
- /* Check if request is self consistent */
- if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Always allocate for max space. Easier, and won't last
- * long... */
- extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL)
- return -ENOMEM;
-
- /* Copy extra in aligned buffer */
- memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
- /* Call the handler */
- ret = handler(dev, &info, &wrqu_point, extra);
-
- /* Cleanup - I told you it wasn't that long ;-) */
- kfree(extra);
- }
-
- /* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
- ret = call_commit_handler(dev);
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_getlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_get(struct net_device * dev,
- char * data,
- int len,
- char ** p_buf,
- int * p_len)
-{
- struct iw_event * request = (struct iw_event *) data;
- iw_handler handler;
-
- /* Check length */
- if(len < IW_EV_LCP_LEN) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
- dev->name, len);
- return -EINVAL;
- }
-
- /* ReCheck length (len may have padding) */
- if(request->len > len) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
- dev->name, request->len, len);
- return -EINVAL;
- }
-
- /* Only accept GET requests in here */
- if(!IW_IS_GET(request->cmd))
- return -EOPNOTSUPP;
-
- /* If command is `get the encoding parameters', check if
- * the user has the right to do it */
- if (request->cmd == SIOCGIWENCODE ||
- request->cmd == SIOCGIWENCODEEXT) {
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- }
-
- /* Special cases */
- if(request->cmd == SIOCGIWSTATS)
- /* Get Wireless Stats */
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- &iw_handler_get_iwstats,
- p_buf, p_len);
- if(request->cmd == SIOCGIWPRIV) {
- /* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers == NULL)
- return -EOPNOTSUPP;
- /* Get Wireless Stats */
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- &iw_handler_get_private,
- p_buf, p_len);
- }
+ if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
+ return ioctl_standard_call(dev, ifr, cmd,
+ &iw_handler_get_private);
/* Basic check */
if (!netif_device_present(dev))
return -ENODEV;
- /* Try to find the handler */
- handler = get_handler(dev, request->cmd);
- if(handler != NULL) {
+ /* New driver API : try to find the handler */
+ handler = get_handler(dev, cmd);
+ if (handler) {
/* Standard and private are not the same */
- if(request->cmd < SIOCIWFIRSTPRIV)
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- handler,
- p_buf, p_len);
+ if (cmd < SIOCIWFIRSTPRIV)
+ return ioctl_standard_call(dev, ifr, cmd, handler);
else
- return rtnetlink_private_get(dev,
- request,
- request->len,
- handler,
- p_buf, p_len);
+ return ioctl_private_call(dev, ifr, cmd, handler);
}
-
+ /* Old driver API : call driver ioctl handler */
+ if (dev->do_ioctl)
+ return dev->do_ioctl(dev, ifr, cmd);
return -EOPNOTSUPP;
}
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_setlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_set(struct net_device * dev,
- char * data,
- int len)
+/* entry point from dev ioctl */
+int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+ void __user *arg)
{
- struct iw_event * request = (struct iw_event *) data;
- iw_handler handler;
-
- /* Check length */
- if(len < IW_EV_LCP_LEN) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
- dev->name, len);
- return -EINVAL;
- }
-
- /* ReCheck length (len may have padding) */
- if(request->len > len) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
- dev->name, request->len, len);
- return -EINVAL;
- }
-
- /* Only accept SET requests in here */
- if(!IW_IS_SET(request->cmd))
- return -EOPNOTSUPP;
-
- /* Basic check */
- if (!netif_device_present(dev))
- return -ENODEV;
-
- /* New driver API : try to find the handler */
- handler = get_handler(dev, request->cmd);
- if(handler != NULL) {
- /* Standard and private are not the same */
- if(request->cmd < SIOCIWFIRSTPRIV)
- return rtnetlink_standard_set(dev,
- request,
- request->len,
- handler);
- else
- return rtnetlink_private_set(dev,
- request,
- request->len,
- handler);
- }
+ int ret;
- return -EOPNOTSUPP;
+ /* If command is `set a parameter', or
+ * `get the encoding parameters', check if
+ * the user has the right to do it */
+ if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+ && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ dev_load(ifr->ifr_name);
+ rtnl_lock();
+ ret = wireless_process_ioctl(ifr, cmd);
+ rtnl_unlock();
+ if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
}
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
/************************* EVENT PROCESSING *************************/
/*
@@ -1870,7 +1080,6 @@ int wireless_rtnetlink_set(struct net_device * dev,
* Most often, the event will be propagated through rtnetlink
*/
-#ifdef WE_EVENT_RTNETLINK
/* ---------------------------------------------------------------- */
/*
* Locking...
@@ -1915,15 +1124,12 @@ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
* current wireless config. Dumping the wireless config is far too
* expensive (for each parameter, the driver need to query the hardware).
*/
-static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
- struct net_device * dev,
- int type,
- char * event,
- int event_len)
+static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
+ int type, char *event, int event_len)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
r = NLMSG_DATA(nlh);
@@ -1937,12 +1143,12 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
/* Add the wireless events in the netlink packet */
RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1953,9 +1159,7 @@ rtattr_failure:
* Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
* within a RTM_NEWLINK event.
*/
-static inline void rtmsg_iwinfo(struct net_device * dev,
- char * event,
- int event_len)
+static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
{
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
@@ -1974,8 +1178,6 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
tasklet_schedule(&wireless_nlevent_tasklet);
}
-#endif /* WE_EVENT_RTNETLINK */
-
/* ---------------------------------------------------------------- */
/*
* Main event dispatcher. Called from other parts and drivers.
@@ -1997,17 +1199,17 @@ void wireless_send_event(struct net_device * dev,
unsigned cmd_index; /* *MUST* be unsigned */
/* Get the description of the Event */
- if(cmd <= SIOCIWLAST) {
+ if (cmd <= SIOCIWLAST) {
cmd_index = cmd - SIOCIWFIRST;
- if(cmd_index < standard_ioctl_num)
+ if (cmd_index < standard_ioctl_num)
descr = &(standard_ioctl[cmd_index]);
} else {
cmd_index = cmd - IWEVFIRST;
- if(cmd_index < standard_event_num)
+ if (cmd_index < standard_event_num)
descr = &(standard_event[cmd_index]);
}
/* Don't accept unknown events */
- if(descr == NULL) {
+ if (descr == NULL) {
/* Note : we don't return an error to the driver, because
* the driver would not know what to do about it. It can't
* return an error to the user, because the event is not
@@ -2019,63 +1221,50 @@ void wireless_send_event(struct net_device * dev,
dev->name, cmd);
return;
}
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_EVENT_DEBUG */
/* Check extra parameters and set extra_len */
- if(descr->header_type == IW_HEADER_TYPE_POINT) {
+ if (descr->header_type == IW_HEADER_TYPE_POINT) {
/* Check if number of token fits within bounds */
- if(wrqu->data.length > descr->max_tokens) {
+ if (wrqu->data.length > descr->max_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
return;
}
- if(wrqu->data.length < descr->min_tokens) {
+ if (wrqu->data.length < descr->min_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
return;
}
/* Calculate extra_len - extra is NULL for restricted events */
- if(extra != NULL)
+ if (extra != NULL)
extra_len = wrqu->data.length * descr->token_size;
/* Always at an offset in wrqu */
wrqu_off = IW_EV_POINT_OFF;
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
-#endif /* WE_EVENT_DEBUG */
}
/* Total length of the event */
hdr_len = event_type_size[descr->header_type];
event_len = hdr_len + extra_len;
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
-#endif /* WE_EVENT_DEBUG */
-
/* Create temporary buffer to hold the event */
event = kmalloc(event_len, GFP_ATOMIC);
- if(event == NULL)
+ if (event == NULL)
return;
/* Fill event */
event->len = event_len;
event->cmd = cmd;
memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
- if(extra != NULL)
+ if (extra)
memcpy(((char *) event) + hdr_len, extra, extra_len);
-#ifdef WE_EVENT_RTNETLINK
/* Send via the RtNetlink event channel */
rtmsg_iwinfo(dev, (char *) event, event_len);
-#endif /* WE_EVENT_RTNETLINK */
/* Cleanup */
kfree(event);
return; /* Always success, I guess ;-) */
}
+EXPORT_SYMBOL(wireless_send_event);
/********************** ENHANCED IWSPY SUPPORT **********************/
/*
@@ -2095,11 +1284,11 @@ void wireless_send_event(struct net_device * dev,
* Because this is called on the Rx path via wireless_spy_update(),
* we want it to be efficient...
*/
-static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+static inline struct iw_spy_data *get_spydata(struct net_device *dev)
{
/* This is the new way */
- if(dev->wireless_data)
- return(dev->wireless_data->spy_data);
+ if (dev->wireless_data)
+ return dev->wireless_data->spy_data;
return NULL;
}
@@ -2116,7 +1305,7 @@ int iw_handler_set_spy(struct net_device * dev,
struct sockaddr * address = (struct sockaddr *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Disable spy collection while we copy the addresses.
@@ -2133,29 +1322,16 @@ int iw_handler_set_spy(struct net_device * dev,
smp_wmb();
/* Are there are addresses to copy? */
- if(wrqu->data.length > 0) {
+ if (wrqu->data.length > 0) {
int i;
/* Copy addresses */
- for(i = 0; i < wrqu->data.length; i++)
+ for (i = 0; i < wrqu->data.length; i++)
memcpy(spydata->spy_address[i], address[i].sa_data,
ETH_ALEN);
/* Reset stats */
memset(spydata->spy_stat, 0,
sizeof(struct iw_quality) * IW_MAX_SPY);
-
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
- for (i = 0; i < wrqu->data.length; i++)
- printk(KERN_DEBUG
- "%02X:%02X:%02X:%02X:%02X:%02X \n",
- spydata->spy_address[i][0],
- spydata->spy_address[i][1],
- spydata->spy_address[i][2],
- spydata->spy_address[i][3],
- spydata->spy_address[i][4],
- spydata->spy_address[i][5]);
-#endif /* WE_SPY_DEBUG */
}
/* Make sure above is updated before re-enabling */
@@ -2166,6 +1342,7 @@ int iw_handler_set_spy(struct net_device * dev,
return 0;
}
+EXPORT_SYMBOL(iw_handler_set_spy);
/*------------------------------------------------------------------*/
/*
@@ -2181,26 +1358,27 @@ int iw_handler_get_spy(struct net_device * dev,
int i;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
wrqu->data.length = spydata->spy_number;
/* Copy addresses. */
- for(i = 0; i < spydata->spy_number; i++) {
+ for (i = 0; i < spydata->spy_number; i++) {
memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
address[i].sa_family = AF_UNIX;
}
/* Copy stats to the user buffer (just after). */
- if(spydata->spy_number > 0)
+ if (spydata->spy_number > 0)
memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
spydata->spy_stat,
sizeof(struct iw_quality) * spydata->spy_number);
/* Reset updated flags. */
- for(i = 0; i < spydata->spy_number; i++)
+ for (i = 0; i < spydata->spy_number; i++)
spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
}
+EXPORT_SYMBOL(iw_handler_get_spy);
/*------------------------------------------------------------------*/
/*
@@ -2215,7 +1393,7 @@ int iw_handler_set_thrspy(struct net_device * dev,
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Just do it */
@@ -2225,12 +1403,9 @@ int iw_handler_set_thrspy(struct net_device * dev,
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
-#endif /* WE_SPY_DEBUG */
-
return 0;
}
+EXPORT_SYMBOL(iw_handler_set_thrspy);
/*------------------------------------------------------------------*/
/*
@@ -2245,7 +1420,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Just do it */
@@ -2254,6 +1429,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
return 0;
}
+EXPORT_SYMBOL(iw_handler_get_thrspy);
/*------------------------------------------------------------------*/
/*
@@ -2279,16 +1455,6 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(&(threshold.low), &(spydata->spy_thr_low),
2 * sizeof(struct iw_quality));
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
- threshold.addr.sa_data[0],
- threshold.addr.sa_data[1],
- threshold.addr.sa_data[2],
- threshold.addr.sa_data[3],
- threshold.addr.sa_data[4],
- threshold.addr.sa_data[5], threshold.qual.level);
-#endif /* WE_SPY_DEBUG */
-
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
}
@@ -2309,16 +1475,12 @@ void wireless_spy_update(struct net_device * dev,
int match = -1;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return;
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
-#endif /* WE_SPY_DEBUG */
-
/* Update all records that match */
- for(i = 0; i < spydata->spy_number; i++)
- if(!compare_ether_addr(address, spydata->spy_address[i])) {
+ for (i = 0; i < spydata->spy_number; i++)
+ if (!compare_ether_addr(address, spydata->spy_address[i])) {
memcpy(&(spydata->spy_stat[i]), wstats,
sizeof(struct iw_quality));
match = i;
@@ -2328,15 +1490,15 @@ void wireless_spy_update(struct net_device * dev,
* To avoid event storms, we have a simple hysteresis : we generate
* event only when we go under the low threshold or above the
* high threshold. */
- if(match >= 0) {
- if(spydata->spy_thr_under[match]) {
- if(wstats->level > spydata->spy_thr_high.level) {
+ if (match >= 0) {
+ if (spydata->spy_thr_under[match]) {
+ if (wstats->level > spydata->spy_thr_high.level) {
spydata->spy_thr_under[match] = 0;
iw_send_thrspy_event(dev, spydata,
address, wstats);
}
} else {
- if(wstats->level < spydata->spy_thr_low.level) {
+ if (wstats->level < spydata->spy_thr_low.level) {
spydata->spy_thr_under[match] = 1;
iw_send_thrspy_event(dev, spydata,
address, wstats);
@@ -2344,10 +1506,4 @@ void wireless_spy_update(struct net_device * dev,
}
}
}
-
-EXPORT_SYMBOL(iw_handler_get_spy);
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-EXPORT_SYMBOL(iw_handler_set_spy);
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-EXPORT_SYMBOL(wireless_send_event);
EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e62ba41b05c5..479927cb45ca 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,7 +951,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
* Incoming Call User Data.
*/
if (skb->len >= 0) {
- memcpy(makex25->calluserdata.cuddata, skb->data, skb->len);
+ skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
makex25->calluserdata.cudlength = skb->len;
}
@@ -1058,9 +1058,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
*/
SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
- asmptr = skb->h.raw = skb_put(skb, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
- rc = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (rc)
goto out_kfree_skb;
@@ -1210,8 +1211,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
}
}
- skb->h.raw = skb->data;
-
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1280,6 +1280,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = sock_get_timestamp(sk,
(struct timeval __user *)argp);
break;
+ case SIOCGSTAMPNS:
+ rc = -EINVAL;
+ if (sk)
+ rc = sock_get_timestampns(sk,
+ (struct timespec __user *)argp);
+ break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
@@ -1521,6 +1527,12 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
rc = compat_sock_get_timestamp(sk,
(struct timeval __user*)argp);
break;
+ case SIOCGSTAMPNS:
+ rc = -EINVAL;
+ if (sk)
+ rc = compat_sock_get_timestampns(sk,
+ (struct timespec __user*)argp);
+ break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
@@ -1593,7 +1605,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
.sendpage = sock_no_sendpage,
};
-#include <linux/smp_lock.h>
SOCKOPS_WRAP(x25_proto, AF_X25);
static struct packet_type x25_packet_type = {
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c7221de98a95..848a6b6f90a6 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -48,7 +48,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
if ((sk = x25_find_socket(lci, nb)) != NULL) {
int queued = 1;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
queued = x25_process_rx_frame(sk, skb);
@@ -191,7 +191,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
{
unsigned char *dptr;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
switch (nb->dev->type) {
case ARPHRD_X25:
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index d339e0c810a8..8738ec7ce693 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -26,64 +26,66 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
short same_lci = 0;
int rc = 0;
- if ((rt = x25_get_route(dest_addr)) != NULL) {
+ if ((rt = x25_get_route(dest_addr)) == NULL)
+ goto out_no_route;
- if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) {
- /* This shouldnt happen, if it occurs somehow
- * do something sensible
- */
- goto out_put_route;
- }
-
- /* Avoid a loop. This is the normal exit path for a
- * system with only one x.25 iface and default route
+ if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) {
+ /* This shouldnt happen, if it occurs somehow
+ * do something sensible
*/
- if (rt->dev == from->dev) {
- goto out_put_nb;
- }
+ goto out_put_route;
+ }
- /* Remote end sending a call request on an already
- * established LCI? It shouldnt happen, just in case..
- */
- read_lock_bh(&x25_forward_list_lock);
- list_for_each(entry, &x25_forward_list) {
- x25_frwd = list_entry(entry, struct x25_forward, node);
- if (x25_frwd->lci == lci) {
- printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n");
- same_lci = 1;
- }
- }
- read_unlock_bh(&x25_forward_list_lock);
-
- /* Save the forwarding details for future traffic */
- if (!same_lci){
- if ((new_frwd = kmalloc(sizeof(struct x25_forward),
- GFP_ATOMIC)) == NULL){
- rc = -ENOMEM;
- goto out_put_nb;
- }
- new_frwd->lci = lci;
- new_frwd->dev1 = rt->dev;
- new_frwd->dev2 = from->dev;
- write_lock_bh(&x25_forward_list_lock);
- list_add(&new_frwd->node, &x25_forward_list);
- write_unlock_bh(&x25_forward_list_lock);
+ /* Avoid a loop. This is the normal exit path for a
+ * system with only one x.25 iface and default route
+ */
+ if (rt->dev == from->dev) {
+ goto out_put_nb;
+ }
+
+ /* Remote end sending a call request on an already
+ * established LCI? It shouldnt happen, just in case..
+ */
+ read_lock_bh(&x25_forward_list_lock);
+ list_for_each(entry, &x25_forward_list) {
+ x25_frwd = list_entry(entry, struct x25_forward, node);
+ if (x25_frwd->lci == lci) {
+ printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n");
+ same_lci = 1;
}
+ }
+ read_unlock_bh(&x25_forward_list_lock);
- /* Forward the call request */
- if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){
+ /* Save the forwarding details for future traffic */
+ if (!same_lci){
+ if ((new_frwd = kmalloc(sizeof(struct x25_forward),
+ GFP_ATOMIC)) == NULL){
+ rc = -ENOMEM;
goto out_put_nb;
}
- x25_transmit_link(skbn, neigh_new);
- rc = 1;
+ new_frwd->lci = lci;
+ new_frwd->dev1 = rt->dev;
+ new_frwd->dev2 = from->dev;
+ write_lock_bh(&x25_forward_list_lock);
+ list_add(&new_frwd->node, &x25_forward_list);
+ write_unlock_bh(&x25_forward_list_lock);
}
+ /* Forward the call request */
+ if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){
+ goto out_put_nb;
+ }
+ x25_transmit_link(skbn, neigh_new);
+ rc = 1;
+
out_put_nb:
x25_neigh_put(neigh_new);
out_put_route:
x25_route_put(rt);
+
+out_no_route:
return rc;
}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index c5239fcdefa0..1c88762c2794 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -53,17 +53,20 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
skb_queue_tail(&x25->fragment_queue, skb);
- skbn->h.raw = skbn->data;
+ skb_reset_transport_header(skbn);
skbo = skb_dequeue(&x25->fragment_queue);
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
while ((skbo =
skb_dequeue(&x25->fragment_queue)) != NULL) {
skb_pull(skbo, (x25->neighbour->extended) ?
X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
@@ -112,8 +115,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
* Copy any Call User Data.
*/
if (skb->len >= 0) {
- memcpy(x25->calluserdata.cuddata, skb->data,
- skb->len);
+ skb_copy_from_linear_data(skb,
+ x25->calluserdata.cuddata,
+ skb->len);
x25->calluserdata.cudlength = skb->len;
}
if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 6f5737853912..2b96b52114d6 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -61,7 +61,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
if (skb->len - header_len > max_len) {
/* Save a copy of the Header */
- memcpy(header, skb->data, header_len);
+ skb_copy_from_linear_data(skb, header, header_len);
skb_pull(skb, header_len);
frontlen = skb_headroom(skb);
@@ -84,12 +84,12 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
len = max_len > skb->len ? skb->len : max_len;
/* Copy the user data */
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
skb_pull(skb, len);
/* Duplicate the Header */
skb_push(skbn, header_len);
- memcpy(skbn->data, header, header_len);
+ skb_copy_to_linear_data(skbn, header, header_len);
if (skb->len > 0) {
if (x25->neighbour->extended)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f373a8a7d9c8..6249a9405bb8 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -612,175 +612,6 @@ EXPORT_SYMBOL_GPL(skb_icv_walk);
#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-/* Looking generic it is not used in another places. */
-
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
- int start = skb_headlen(skb);
- int i, copy = start - offset;
- int elt = 0;
-
- if (copy > 0) {
- if (copy > len)
- copy = len;
- sg[elt].page = virt_to_page(skb->data + offset);
- sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
- sg[elt].length = copy;
- elt++;
- if ((len -= copy) == 0)
- return elt;
- offset += copy;
- }
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
-
- BUG_TRAP(start <= offset + len);
-
- end = start + skb_shinfo(skb)->frags[i].size;
- if ((copy = end - offset) > 0) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- if (copy > len)
- copy = len;
- sg[elt].page = frag->page;
- sg[elt].offset = frag->page_offset+offset-start;
- sg[elt].length = copy;
- elt++;
- if (!(len -= copy))
- return elt;
- offset += copy;
- }
- start = end;
- }
-
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- BUG_TRAP(start <= offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
- if ((len -= copy) == 0)
- return elt;
- offset += copy;
- }
- start = end;
- }
- }
- BUG_ON(len);
- return elt;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
-
-/* Check that skb data bits are writable. If they are not, copy data
- * to newly created private area. If "tailbits" is given, make sure that
- * tailbits bytes beyond current end of skb are writable.
- *
- * Returns amount of elements of scatterlist to load for subsequent
- * transformations and pointer to writable trailer skb.
- */
-
-int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
-{
- int copyflag;
- int elt;
- struct sk_buff *skb1, **skb_p;
-
- /* If skb is cloned or its head is paged, reallocate
- * head pulling out all the pages (pages are considered not writable
- * at the moment even if they are anonymous).
- */
- if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
- return -ENOMEM;
-
- /* Easy case. Most of packets will go this way. */
- if (!skb_shinfo(skb)->frag_list) {
- /* A little of trouble, not enough of space for trailer.
- * This should not happen, when stack is tuned to generate
- * good frames. OK, on miss we reallocate and reserve even more
- * space, 128 bytes is fair. */
-
- if (skb_tailroom(skb) < tailbits &&
- pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
- return -ENOMEM;
-
- /* Voila! */
- *trailer = skb;
- return 1;
- }
-
- /* Misery. We are in troubles, going to mincer fragments... */
-
- elt = 1;
- skb_p = &skb_shinfo(skb)->frag_list;
- copyflag = 0;
-
- while ((skb1 = *skb_p) != NULL) {
- int ntail = 0;
-
- /* The fragment is partially pulled by someone,
- * this can happen on input. Copy it and everything
- * after it. */
-
- if (skb_shared(skb1))
- copyflag = 1;
-
- /* If the skb is the last, worry about trailer. */
-
- if (skb1->next == NULL && tailbits) {
- if (skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list ||
- skb_tailroom(skb1) < tailbits)
- ntail = tailbits + 128;
- }
-
- if (copyflag ||
- skb_cloned(skb1) ||
- ntail ||
- skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list) {
- struct sk_buff *skb2;
-
- /* Fuck, we are miserable poor guys... */
- if (ntail == 0)
- skb2 = skb_copy(skb1, GFP_ATOMIC);
- else
- skb2 = skb_copy_expand(skb1,
- skb_headroom(skb1),
- ntail,
- GFP_ATOMIC);
- if (unlikely(skb2 == NULL))
- return -ENOMEM;
-
- if (skb1->sk)
- skb_set_owner_w(skb2, skb1->sk);
-
- /* Looking around. Are we still alive?
- * OK, link new skb, drop old one */
-
- skb2->next = skb1->next;
- *skb_p = skb2;
- kfree_skb(skb1);
- skb1 = skb2;
- }
- elt++;
- *trailer = skb1;
- skb_p = &skb1->next;
- }
-
- return elt;
-}
-EXPORT_SYMBOL_GPL(skb_cow_data);
-
void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
{
if (tail != skb) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae1419..5c4695840c58 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
case IPPROTO_COMP:
if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
return -EINVAL;
- *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
+ *spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
*seq = 0;
return 0;
default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
if (!pskb_may_pull(skb, 16))
return -EINVAL;
- *spi = *(__be32*)(skb->h.raw + offset);
- *seq = *(__be32*)(skb->h.raw + offset_seq);
+ *spi = *(__be32*)(skb_transport_header(skb) + offset);
+ *seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
return 0;
}
EXPORT_SYMBOL(xfrm_parse_spi);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0c3a70ac5075..95271e8426a1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_policy_timer(unsigned long data)
{
struct xfrm_policy *xp = (struct xfrm_policy*)data;
- unsigned long now = (unsigned long)xtime.tv_sec;
+ unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
int dir;
@@ -579,8 +579,22 @@ static inline int xfrm_byidx_should_resize(int total)
return 0;
}
-static DEFINE_MUTEX(hash_resize_mutex);
+void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
+{
+ read_lock_bh(&xfrm_policy_lock);
+ si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
+ si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
+ si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
+ si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+ si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+ si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+ si->spdhcnt = xfrm_idx_hmask;
+ si->spdhmcnt = xfrm_policy_hashmax;
+ read_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_spd_getinfo);
+static DEFINE_MUTEX(hash_resize_mutex);
static void xfrm_hash_resize(struct work_struct *__unused)
{
int dir, total;
@@ -690,7 +704,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
- policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+ policy->curlft.add_time = get_seconds();
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
@@ -1049,7 +1063,7 @@ static inline int policy_to_flow_dir(int dir)
return FLOW_DIR_OUT;
case XFRM_POLICY_FWD:
return FLOW_DIR_FWD;
- };
+ }
}
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
@@ -1133,7 +1147,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
old_pol = sk->sk_policy[dir];
sk->sk_policy[dir] = pol;
if (pol) {
- pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+ pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
@@ -1330,6 +1344,40 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
return err;
}
+static int inline
+xfrm_dst_alloc_copy(void **target, void *src, int size)
+{
+ if (!*target) {
+ *target = kmalloc(size, GFP_ATOMIC);
+ if (!*target)
+ return -ENOMEM;
+ }
+ memcpy(*target, src, size);
+ return 0;
+}
+
+static int inline
+xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ return xfrm_dst_alloc_copy((void **)&(xdst->partner),
+ sel, sizeof(*sel));
+#else
+ return 0;
+#endif
+}
+
+static int inline
+xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
+#else
+ return 0;
+#endif
+}
static int stale_bundle(struct dst_entry *dst);
@@ -1386,7 +1434,7 @@ restart:
return 0;
family = dst_orig->ops->family;
- policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+ policy->curlft.use_time = get_seconds();
pols[0] = policy;
npols ++;
xfrm_nr += pols[0]->xfrm_nr;
@@ -1518,6 +1566,18 @@ restart:
err = -EHOSTUNREACH;
goto error;
}
+
+ if (npols > 1)
+ err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+ else
+ err = xfrm_dst_update_origin(dst, fl);
+ if (unlikely(err)) {
+ write_unlock_bh(&policy->lock);
+ if (dst)
+ dst_free(dst);
+ goto error;
+ }
+
dst->next = policy->bundles;
policy->bundles = dst;
dst_hold(dst);
@@ -1682,7 +1742,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pol->curlft.use_time = get_seconds();
pols[0] = pol;
npols ++;
@@ -1694,7 +1754,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1]))
return 0;
- pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pols[1]->curlft.use_time = get_seconds();
npols ++;
}
}
@@ -1933,6 +1993,15 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
(dst->dev && !netif_running(dst->dev)))
return 0;
+#ifdef CONFIG_XFRM_SUB_POLICY
+ if (fl) {
+ if (first->origin && !flow_cache_uli_match(first->origin, fl))
+ return 0;
+ if (first->partner &&
+ !xfrm_selector_match(first->partner, fl, family))
+ return 0;
+ }
+#endif
last = NULL;
@@ -2089,7 +2158,7 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
sizeof(struct in6_addr));
}
audit_log_format(audit_buf,
- " src=" NIP6_FMT "dst=" NIP6_FMT,
+ " src=" NIP6_FMT " dst=" NIP6_FMT,
NIP6(saddr6), NIP6(daddr6));
}
break;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a35f9e4ede26..9955ff4da0a2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_timer_handler(unsigned long data)
{
struct xfrm_state *x = (struct xfrm_state*)data;
- unsigned long now = (unsigned long)xtime.tv_sec;
+ unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
init_timer(&x->rtimer);
x->rtimer.function = xfrm_replay_timer_handler;
x->rtimer.data = (unsigned long)x;
- x->curlft.add_time = (unsigned long)xtime.tv_sec;
+ x->curlft.add_time = get_seconds();
x->lft.soft_byte_limit = XFRM_INF;
x->lft.soft_packet_limit = XFRM_INF;
x->lft.hard_byte_limit = XFRM_INF;
@@ -421,6 +421,16 @@ restart:
}
EXPORT_SYMBOL(xfrm_state_flush);
+void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
+{
+ spin_lock_bh(&xfrm_state_lock);
+ si->sadcnt = xfrm_state_num;
+ si->sadhcnt = xfrm_state_hmask;
+ si->sadhmcnt = xfrm_state_hashmax;
+ spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_sad_getinfo);
+
static int
xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
struct xfrm_tmpl *tmpl,
@@ -458,7 +468,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
x->id.daddr.a6))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -493,7 +503,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
x->props.saddr.a6))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -704,7 +714,8 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
x->props.mode != mode ||
x->props.family != family ||
x->km.state != XFRM_STATE_ACQ ||
- x->id.spi != 0)
+ x->id.spi != 0 ||
+ x->id.proto != proto)
continue;
switch (family) {
@@ -721,7 +732,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
(struct in6_addr *)saddr))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -754,7 +765,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
(struct in6_addr *)daddr);
break;
- };
+ }
x->km.state = XFRM_STATE_ACQ;
x->id.proto = proto;
@@ -801,7 +812,8 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && x->km.seq) {
x1 = __xfrm_find_acq_byseq(x->km.seq);
- if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
+ if (x1 && ((x1->id.proto != x->id.proto) ||
+ xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
xfrm_state_put(x1);
x1 = NULL;
}
@@ -1049,7 +1061,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
if (!x->curlft.use_time)
- x->curlft.use_time = (unsigned long)xtime.tv_sec;
+ x->curlft.use_time = get_seconds();
if (x->km.state != XFRM_STATE_VALID)
return -EINVAL;
@@ -1369,7 +1381,8 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
return 0;
diff = x->replay.seq - seq;
- if (diff >= x->props.replay_window) {
+ if (diff >= min_t(unsigned int, x->props.replay_window,
+ sizeof(x->replay.bitmap) * 8)) {
x->stats.replay_window++;
return -EINVAL;
}
@@ -1664,37 +1677,17 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-/*
- * This function is NOT optimal. For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal. However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res = mtu;
-
- res -= x->props.header_len;
-
- for (;;) {
- int m = res;
-
- if (m < 68)
- return 68;
-
- spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_max_size)
- m = x->type->get_max_size(x, m);
- else
- m += x->props.header_len;
- spin_unlock_bh(&x->lock);
-
- if (m <= mtu)
- break;
- res -= (m - mtu);
- }
+ int res;
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_mtu)
+ res = x->type->get_mtu(x, mtu);
+ else
+ res = mtu;
+ spin_unlock_bh(&x->lock);
return res;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 96789952f6a3..b14c7e590c31 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
default:
return -EINVAL;
- };
+ }
algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
err = -EINVAL;
switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
err = 0;
@@ -272,9 +272,8 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
}
-static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
+static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
{
- struct xfrm_sec_ctx *xfrm_ctx = xp->security;
int len = 0;
if (xfrm_ctx) {
@@ -577,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
struct sk_buff *skb = sp->out_skb;
struct xfrm_usersa_info *p;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
if (sp->this_idx < sp->start_idx)
goto out;
@@ -622,14 +621,14 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
if (x->lastused)
RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
out:
sp->this_idx++;
return 0;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -673,6 +672,113 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
+static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+ struct xfrmk_spdinfo si;
+ struct xfrmu_spdinfo spc;
+ struct xfrmu_spdhinfo sph;
+ struct nlmsghdr *nlh;
+ u32 *f;
+
+ nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+ if (nlh == NULL) /* shouldnt really happen ... */
+ return -EMSGSIZE;
+
+ f = nlmsg_data(nlh);
+ *f = flags;
+ xfrm_spd_getinfo(&si);
+ spc.incnt = si.incnt;
+ spc.outcnt = si.outcnt;
+ spc.fwdcnt = si.fwdcnt;
+ spc.inscnt = si.inscnt;
+ spc.outscnt = si.outscnt;
+ spc.fwdscnt = si.fwdscnt;
+ sph.spdhcnt = si.spdhcnt;
+ sph.spdhmcnt = si.spdhmcnt;
+
+ NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
+ NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
+{
+ struct sk_buff *r_skb;
+ u32 *flags = NLMSG_DATA(nlh);
+ u32 spid = NETLINK_CB(skb).pid;
+ u32 seq = nlh->nlmsg_seq;
+ int len = NLMSG_LENGTH(sizeof(u32));
+
+ len += RTA_SPACE(sizeof(struct xfrmu_spdinfo));
+ len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo));
+
+ r_skb = alloc_skb(len, GFP_ATOMIC);
+ if (r_skb == NULL)
+ return -ENOMEM;
+
+ if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+ BUG();
+
+ return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
+static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+ struct xfrmk_sadinfo si;
+ struct xfrmu_sadhinfo sh;
+ struct nlmsghdr *nlh;
+ u32 *f;
+
+ nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+ if (nlh == NULL) /* shouldnt really happen ... */
+ return -EMSGSIZE;
+
+ f = nlmsg_data(nlh);
+ *f = flags;
+ xfrm_sad_getinfo(&si);
+
+ sh.sadhmcnt = si.sadhmcnt;
+ sh.sadhcnt = si.sadhcnt;
+
+ NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt);
+ NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
+{
+ struct sk_buff *r_skb;
+ u32 *flags = NLMSG_DATA(nlh);
+ u32 spid = NETLINK_CB(skb).pid;
+ u32 seq = nlh->nlmsg_seq;
+ int len = NLMSG_LENGTH(sizeof(u32));
+
+ len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo));
+ len += RTA_SPACE(sizeof(u32));
+
+ r_skb = alloc_skb(len, GFP_ATOMIC);
+
+ if (r_skb == NULL)
+ return -ENOMEM;
+
+ if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+ BUG();
+
+ return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct rtattr **xfrma)
{
@@ -712,7 +818,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
default:
return -EINVAL;
- };
+ }
if (p->min > p->max)
return -EINVAL;
@@ -790,7 +896,7 @@ static int verify_policy_dir(u8 dir)
default:
return -EINVAL;
- };
+ }
return 0;
}
@@ -806,7 +912,7 @@ static int verify_policy_type(u8 type)
default:
return -EINVAL;
- };
+ }
return 0;
}
@@ -822,7 +928,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
switch (p->action) {
case XFRM_POLICY_ALLOW:
@@ -831,7 +937,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
switch (p->sel.family) {
case AF_INET:
@@ -846,7 +952,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
return verify_policy_dir(p->dir);
}
@@ -913,7 +1019,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
#endif
default:
return -EINVAL;
- };
+ }
}
return 0;
@@ -1158,7 +1264,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
if (sp->this_idx < sp->start_idx)
goto out;
@@ -1177,13 +1283,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
out:
sp->this_idx++;
return 0;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1331,7 +1437,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
struct xfrm_aevent_id *id;
struct nlmsghdr *nlh;
struct xfrm_lifetime_cur ltime;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id));
id = NLMSG_DATA(nlh);
@@ -1363,12 +1469,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer);
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1745,7 +1851,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
struct xfrm_migrate *mp;
struct xfrm_userpolicy_id *pol_id;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
int i;
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id));
@@ -1765,10 +1871,10 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
goto nlmsg_failure;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1824,6 +1930,8 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
[XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+ [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
+ [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
};
#undef XMSGSIZE
@@ -1851,55 +1959,40 @@ static struct xfrm_link {
[XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae },
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae },
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate },
+ [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo },
+ [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
};
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct rtattr *xfrma[XFRMA_MAX];
struct xfrm_link *link;
int type, min_len;
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- return 0;
-
type = nlh->nlmsg_type;
-
- /* A control message: ignore them */
- if (type < XFRM_MSG_BASE)
- return 0;
-
- /* Unknown message: reply with EINVAL */
if (type > XFRM_MSG_MAX)
- goto err_einval;
+ return -EINVAL;
type -= XFRM_MSG_BASE;
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
- *errp = -EPERM;
- return -1;
- }
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
(nlh->nlmsg_flags & NLM_F_DUMP)) {
if (link->dump == NULL)
- goto err_einval;
-
- if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
- link->dump, NULL)) != 0) {
- return -1;
- }
+ return -EINVAL;
- netlink_queue_skip(nlh, skb);
- return -1;
+ return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
}
memset(xfrma, 0, sizeof(xfrma));
if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
- goto err_einval;
+ return -EINVAL;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -1909,7 +2002,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
unsigned short flavor = attr->rta_type;
if (flavor) {
if (flavor > XFRMA_MAX)
- goto err_einval;
+ return -EINVAL;
xfrma[flavor - 1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
@@ -1917,14 +2010,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
}
if (link->doit == NULL)
- goto err_einval;
- *errp = link->doit(skb, nlh, xfrma);
-
- return *errp;
+ return -EINVAL;
-err_einval:
- *errp = -EINVAL;
- return -1;
+ return link->doit(skb, nlh, xfrma);
}
static void xfrm_netlink_rcv(struct sock *sk, int len)
@@ -1943,7 +2031,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
{
struct xfrm_user_expire *ue;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE,
sizeof(*ue));
@@ -1953,11 +2041,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2000,7 +2088,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
struct xfrm_usersa_flush *p;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
skb = alloc_skb(len, GFP_ATOMIC);
@@ -2025,7 +2113,7 @@ nlmsg_failure:
return -1;
}
-static int inline xfrm_sa_len(struct xfrm_state *x)
+static inline int xfrm_sa_len(struct xfrm_state *x)
{
int l = 0;
if (x->aalg)
@@ -2046,7 +2134,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
struct xfrm_usersa_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = xfrm_sa_len(x);
int headlen;
@@ -2130,7 +2218,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
{
struct xfrm_user_acquire *ua;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
__u32 seq = xfrm_get_acqseq();
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
@@ -2154,11 +2242,11 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2170,7 +2258,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
- len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+ len += RTA_SPACE(xfrm_user_sec_ctx_size(x->security));
#ifdef CONFIG_XFRM_SUB_POLICY
len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
#endif
@@ -2250,7 +2338,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
struct xfrm_user_polexpire *upe;
struct nlmsghdr *nlh;
int hard = c->data.hard;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
upe = NLMSG_DATA(nlh);
@@ -2265,11 +2353,11 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
goto nlmsg_failure;
upe->hard = !!hard;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2280,7 +2368,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
- len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+ len += RTA_SPACE(xfrm_user_sec_ctx_size(xp->security));
#ifdef CONFIG_XFRM_SUB_POLICY
len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
#endif
@@ -2301,7 +2389,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
struct xfrm_userpolicy_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
int headlen;
@@ -2358,7 +2446,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
{
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = 0;
#ifdef CONFIG_XFRM_SUB_POLICY
len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
@@ -2411,7 +2499,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
{
struct xfrm_user_report *ur;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
ur = NLMSG_DATA(nlh);
@@ -2423,12 +2511,12 @@ static int build_report(struct sk_buff *skb, u8 proto,
if (addr)
RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2467,7 +2555,7 @@ static int __init xfrm_user_init(void)
printk(KERN_INFO "Initializing XFRM netlink socket\n");
nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
- xfrm_netlink_rcv, THIS_MODULE);
+ xfrm_netlink_rcv, NULL, THIS_MODULE);
if (nlsk == NULL)
return -ENOMEM;
rcu_assign_pointer(xfrm_nl, nlsk);