summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_virtio.c2
-rw-r--r--net/Kconfig20
-rw-r--r--net/Makefile2
-rw-r--r--net/activity_stats.c119
-rw-r--r--net/bluetooth/af_bluetooth.c34
-rw-r--r--net/bluetooth/amp.c2
-rw-r--r--net/bluetooth/hci_conn.c61
-rw-r--r--net/bluetooth/hci_core.c24
-rwxr-xr-x[-rw-r--r--]net/bluetooth/hci_event.c20
-rw-r--r--net/bluetooth/l2cap_core.c4
-rw-r--r--net/bluetooth/l2cap_sock.c3
-rw-r--r--net/bluetooth/mgmt.c4
-rw-r--r--net/bluetooth/rfcomm/core.c1
-rw-r--r--net/bluetooth/sco.c53
-rw-r--r--net/bridge/br_device.c11
-rw-r--r--net/core/sock.c140
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c20
-rw-r--r--net/ipv4/devinet.c8
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/ip_sockglue.c3
-rw-r--r--net/ipv4/netfilter/Kconfig20
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c8
-rw-r--r--net/ipv4/ping.c583
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c24
-rw-r--r--net/ipv4/sysfs_net_ipv4.c88
-rw-r--r--net/ipv4/tcp.c117
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/udp.c17
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/af_inet6.c49
-rw-r--r--net/ipv6/exthdrs_core.c13
-rw-r--r--net/ipv6/icmp.c19
-rw-r--r--net/ipv6/netfilter/Kconfig12
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c9
-rw-r--r--net/ipv6/ping.c221
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/mac80211/Kconfig9
-rw-r--r--net/mac80211/ieee80211_i.h3
-rw-r--r--net/mac80211/scan.c14
-rw-r--r--net/mhi/Kconfig91
-rw-r--r--net/mhi/Makefile10
-rw-r--r--net/mhi/l2mux.c280
-rw-r--r--net/mhi/l3mhdp.c993
-rw-r--r--net/mhi/l3mhi.c132
-rw-r--r--net/mhi/l3phonet.c118
-rw-r--r--net/mhi/mhi_dgram.c330
-rw-r--r--net/mhi/mhi_proto.c214
-rw-r--r--net/mhi/mhi_raw.c326
-rw-r--r--net/mhi/mhi_socket.c310
-rw-r--r--net/netfilter/Kconfig42
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/xt_IDLETIMER.c220
-rw-r--r--net/netfilter/xt_qtaguid.c3012
-rw-r--r--net/netfilter/xt_qtaguid_internal.h352
-rw-r--r--net/netfilter/xt_qtaguid_print.c566
-rw-r--r--net/netfilter/xt_qtaguid_print.h120
-rw-r--r--net/netfilter/xt_quota2.c385
-rw-r--r--net/netfilter/xt_socket.c70
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/phonet/Kconfig9
-rw-r--r--net/phonet/Makefile3
-rw-r--r--net/phonet/af_phonet.c100
-rw-r--r--net/phonet/ld_phonet.c644
-rw-r--r--net/phonet/pn_dev.c2
-rw-r--r--net/phonet/socket.c114
-rw-r--r--net/rfkill/Kconfig5
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rfkill/rfkill-gpio.c6
-rw-r--r--net/socket.c5
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/wireless/Kconfig11
-rw-r--r--net/wireless/core.c4
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/util.c7
82 files changed, 9999 insertions, 316 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c76a4388a5d7..b007ac1e4f2d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -693,7 +693,7 @@ static struct p9_trans_module p9_virtio_trans = {
.create = p9_virtio_create,
.close = p9_virtio_close,
.request = p9_virtio_request,
- .zc_request = p9_virtio_zc_request,
+ //.zc_request = p9_virtio_zc_request,
.cancel = p9_virtio_cancel,
/*
* We leave one entry for input and one entry for response
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868e..c9efb8b92c3a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -10,7 +10,7 @@ menuconfig NET
The reason is that some programs need kernel networking support even
when running on a stand-alone machine that isn't connected to any
other computer.
-
+
If you are upgrading from an older kernel, you
should consider updating your networking tools too because changes
in the kernel and the tools often go hand in hand. The tools are
@@ -81,6 +81,19 @@ source "net/netlabel/Kconfig"
endif # if INET
+config ANDROID_PARANOID_NETWORK
+ bool "Only allow certain groups to create sockets"
+ help
+ none
+
+config NET_ACTIVITY_STATS
+ bool "Network activity statistics tracking"
+ default y
+ help
+ Network activity statistics are useful for tracking wireless
+ modem activity on 2G, 3G, 4G wireless networks. Counts number of
+ transmissions and groups them in specified time buckets.
+
config NETWORK_SECMARK
bool "Security Marking"
help
@@ -209,6 +222,7 @@ source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
source "net/lapb/Kconfig"
source "net/phonet/Kconfig"
+source "net/mhi/Kconfig"
source "net/ieee802154/Kconfig"
source "net/mac802154/Kconfig"
source "net/sched/Kconfig"
@@ -220,7 +234,7 @@ source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
config RPS
- boolean
+ boolean "RPS"
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
@@ -287,7 +301,7 @@ config NET_TCPPROBE
Documentation on how to use TCP connection probing can be found
at:
-
+
http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
To compile this code as a module, choose M here: the
diff --git a/net/Makefile b/net/Makefile
index 091e7b04f301..6afdd2740f6e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -67,6 +67,8 @@ obj-$(CONFIG_WIMAX) += wimax/
obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
obj-$(CONFIG_CEPH_LIB) += ceph/
obj-$(CONFIG_BATMAN_ADV) += batman-adv/
+obj-$(CONFIG_MHI) += mhi/
obj-$(CONFIG_NFC) += nfc/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
+obj-$(CONFIG_NET_ACTIVITY_STATS) += activity_stats.o
diff --git a/net/activity_stats.c b/net/activity_stats.c
new file mode 100644
index 000000000000..4609ce2043eb
--- /dev/null
+++ b/net/activity_stats.c
@@ -0,0 +1,119 @@
+/* net/activity_stats.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/suspend.h>
+#include <net/net_namespace.h>
+
+/*
+ * Track transmission rates in buckets (power of 2).
+ * 1,2,4,8...512 seconds.
+ *
+ * Buckets represent the count of network transmissions at least
+ * N seconds apart, where N is 1 << bucket index.
+ */
+#define BUCKET_MAX 10
+
+/* Track network activity frequency */
+static unsigned long activity_stats[BUCKET_MAX];
+static ktime_t last_transmit;
+static ktime_t suspend_time;
+static DEFINE_SPINLOCK(activity_lock);
+
+void activity_stats_update(void)
+{
+ int i;
+ unsigned long flags;
+ ktime_t now;
+ s64 delta;
+
+ spin_lock_irqsave(&activity_lock, flags);
+ now = ktime_get();
+ delta = ktime_to_ns(ktime_sub(now, last_transmit));
+
+ for (i = BUCKET_MAX - 1; i >= 0; i--) {
+ /*
+ * Check if the time delta between network activity is within the
+ * minimum bucket range.
+ */
+ if (delta < (1000000000ULL << i))
+ continue;
+
+ activity_stats[i]++;
+ last_transmit = now;
+ break;
+ }
+ spin_unlock_irqrestore(&activity_lock, flags);
+}
+
+static int activity_stats_show(struct seq_file *m, void *v)
+{
+ int i;
+ int ret;
+
+ seq_printf(m, "Min Bucket(sec) Count\n");
+
+ for (i = 0; i < BUCKET_MAX; i++) {
+ ret = seq_printf(m, "%15d %lu\n", 1 << i, activity_stats[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int activity_stats_notifier(struct notifier_block *nb,
+ unsigned long event, void *dummy)
+{
+ switch (event) {
+ case PM_SUSPEND_PREPARE:
+ suspend_time = ktime_get_real();
+ break;
+
+ case PM_POST_SUSPEND:
+ suspend_time = ktime_sub(ktime_get_real(), suspend_time);
+ last_transmit = ktime_sub(last_transmit, suspend_time);
+ }
+
+ return 0;
+}
+
+static int activity_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, activity_stats_show, PDE_DATA(inode));
+}
+
+static const struct file_operations activity_stats_fops = {
+ .open = activity_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct notifier_block activity_stats_notifier_block = {
+ .notifier_call = activity_stats_notifier,
+};
+
+static int __init activity_stats_init(void)
+{
+ proc_create("activity", S_IRUGO,
+ init_net.proc_net_stat, &activity_stats_fops);
+ return register_pm_notifier(&activity_stats_notifier_block);
+}
+
+subsys_initcall(activity_stats_init);
+
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 6629cdc134dc..f7c36826f3f4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -30,6 +30,11 @@
#include <net/bluetooth/bluetooth.h>
#include <linux/proc_fs.h>
+#ifndef CONFIG_BT_SOCK_DEBUG
+#undef BT_DBG
+#define BT_DBG(D...)
+#endif
+
#define VERSION "2.16"
/* Bluetooth sockets */
@@ -103,11 +108,40 @@ void bt_sock_unregister(int proto)
}
EXPORT_SYMBOL(bt_sock_unregister);
+#ifdef CONFIG_PARANOID_NETWORK
+static inline int current_has_bt_admin(void)
+{
+ return !current_euid();
+}
+
+static inline int current_has_bt(void)
+{
+ return current_has_bt_admin();
+}
+# else
+static inline int current_has_bt_admin(void)
+{
+ return 1;
+}
+
+static inline int current_has_bt(void)
+{
+ return 1;
+}
+#endif
+
static int bt_sock_create(struct net *net, struct socket *sock, int proto,
int kern)
{
int err;
+ if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO ||
+ proto == BTPROTO_L2CAP) {
+ if (!current_has_bt())
+ return -EPERM;
+ } else if (!current_has_bt_admin())
+ return -EPERM;
+
if (net != &init_net)
return -EAFNOSUPPORT;
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index d459ed43c779..a3f3380c2095 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -113,7 +113,7 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
bdaddr_t *dst = mgr->l2cap_conn->dst;
struct hci_conn *hcon;
- hcon = hci_conn_add(hdev, AMP_LINK, dst);
+ hcon = hci_conn_add(hdev, AMP_LINK, 0, dst);
if (!hcon)
return NULL;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6c7f36379722..1c24dcf17dd2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -354,7 +354,8 @@ static void hci_conn_auto_accept(unsigned long arg)
&conn->dst);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,
+ __u16 pkt_type, bdaddr_t *dst)
{
struct hci_conn *conn;
@@ -382,14 +383,22 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
break;
case SCO_LINK:
- if (lmp_esco_capable(hdev))
- conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
- (hdev->esco_type & EDR_ESCO_MASK);
- else
- conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
- break;
+ if (!pkt_type)
+ pkt_type = SCO_ESCO_MASK;
case ESCO_LINK:
- conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
+ if (!pkt_type)
+ pkt_type = ALL_ESCO_MASK;
+ if (lmp_esco_capable(hdev)) {
+ /* HCI Setup Synchronous Connection Command uses
+ reverse logic on the EDR_ESCO_MASK bits */
+ conn->pkt_type = (pkt_type ^ EDR_ESCO_MASK) &
+ hdev->esco_type;
+ } else {
+ /* Legacy HCI Add Sco Connection Command uses a
+ shifted bitmask */
+ conn->pkt_type = (pkt_type << 5) & hdev->pkt_type &
+ SCO_PTYPE_MASK;
+ }
break;
}
@@ -520,7 +529,7 @@ static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
if (le)
return ERR_PTR(-EBUSY);
- le = hci_conn_add(hdev, LE_LINK, dst);
+ le = hci_conn_add(hdev, LE_LINK, 0, dst);
if (!le)
return ERR_PTR(-ENOMEM);
@@ -543,7 +552,7 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
- acl = hci_conn_add(hdev, ACL_LINK, dst);
+ acl = hci_conn_add(hdev, ACL_LINK, 0, dst);
if (!acl)
return ERR_PTR(-ENOMEM);
}
@@ -561,7 +570,8 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
}
static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
- bdaddr_t *dst, u8 sec_level, u8 auth_type)
+ __u16 pkt_type, bdaddr_t *dst,
+ u8 sec_level, u8 auth_type)
{
struct hci_conn *acl;
struct hci_conn *sco;
@@ -572,7 +582,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
- sco = hci_conn_add(hdev, type, dst);
+ sco = hci_conn_add(hdev, type, pkt_type, dst);
if (!sco) {
hci_conn_drop(acl);
return ERR_PTR(-ENOMEM);
@@ -602,7 +612,8 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
}
/* Create SCO, ACL or LE connection. */
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type,
+ __u16 pkt_type, bdaddr_t *dst,
__u8 dst_type, __u8 sec_level, __u8 auth_type)
{
BT_DBG("%s dst %pMR type 0x%x", hdev->name, dst, type);
@@ -614,7 +625,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
return hci_connect_acl(hdev, dst, sec_level, auth_type);
case SCO_LINK:
case ESCO_LINK:
- return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
+ return hci_connect_sco(hdev, type, pkt_type, dst, sec_level, auth_type);
}
return ERR_PTR(-EINVAL);
@@ -880,6 +891,17 @@ int hci_get_conn_list(void __user *arg)
(ci + n)->out = c->out;
(ci + n)->state = c->state;
(ci + n)->link_mode = c->link_mode;
+#ifdef CONFIG_ANDROID
+ if (c->type == SCO_LINK) {
+ (ci + n)->mtu = hdev->sco_mtu;
+ (ci + n)->cnt = hdev->sco_cnt;
+ (ci + n)->pkts = hdev->sco_pkts;
+ } else {
+ (ci + n)->mtu = hdev->acl_mtu;
+ (ci + n)->cnt = hdev->acl_cnt;
+ (ci + n)->pkts = hdev->acl_pkts;
+ }
+#endif
if (++n >= req.conn_num)
break;
}
@@ -916,6 +938,17 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)
ci.out = conn->out;
ci.state = conn->state;
ci.link_mode = conn->link_mode;
+#ifdef CONFIG_ANDROID
+ if (req.type == SCO_LINK) {
+ ci.mtu = hdev->sco_mtu;
+ ci.cnt = hdev->sco_cnt;
+ ci.pkts = hdev->sco_pkts;
+ } else {
+ ci.mtu = hdev->acl_mtu;
+ ci.cnt = hdev->acl_cnt;
+ ci.pkts = hdev->acl_pkts;
+ }
+#endif
}
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7c88f5f83598..a0903c6f736c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -45,13 +45,29 @@ DEFINE_RWLOCK(hci_dev_list_lock);
LIST_HEAD(hci_cb_list);
DEFINE_RWLOCK(hci_cb_list_lock);
+/* HCI notifiers list */
+static ATOMIC_NOTIFIER_HEAD(hci_notifier);
+
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
/* ---- HCI notifications ---- */
+int hci_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&hci_notifier, nb);
+}
+EXPORT_SYMBOL(hci_register_notifier);
+
+int hci_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&hci_notifier, nb);
+}
+EXPORT_SYMBOL(hci_unregister_notifier);
+
static void hci_notify(struct hci_dev *hdev, int event)
{
+ atomic_notifier_call_chain(&hci_notifier, event, hdev);
hci_sock_dev_event(hdev, event);
}
@@ -2593,6 +2609,9 @@ static int hci_send_frame(struct sk_buff *skb)
/* Get rid of skb owner, prior to sending to the driver. */
skb_orphan(skb);
+ /* Notify the registered devices about a new send */
+ hci_notify(hdev, HCI_DEV_WRITE);
+
return hdev->send(skb);
}
@@ -2669,6 +2688,11 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+ if (!hdev->workqueue) {
+ WARN_ON("hci_send_cmd: workqueue not initialised");
+ return -ENOMEM;
+ }
+
skb = hci_prepare_cmd(hdev, opcode, plen, param);
if (!skb) {
BT_ERR("%s no memory for command", hdev->name);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cfca44f8d048..cb6e36925535 100644..100755
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1119,7 +1119,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
}
} else {
if (!conn) {
- conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr);
+ conn = hci_conn_add(hdev, ACL_LINK, 0, &cp->bdaddr);
if (conn) {
conn->out = true;
conn->link_mode |= HCI_LM_MASTER;
@@ -1748,6 +1748,15 @@ unlock:
hci_conn_check_pending(hdev);
}
+static inline bool is_sco_active(struct hci_dev *hdev)
+{
+ if (hci_conn_hash_lookup_state(hdev, SCO_LINK, BT_CONNECTED) ||
+ (hci_conn_hash_lookup_state(hdev, ESCO_LINK,
+ BT_CONNECTED)))
+ return true;
+ return false;
+}
+
static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_conn_request *ev = (void *) skb->data;
@@ -1775,7 +1784,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type,
&ev->bdaddr);
if (!conn) {
- conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr);
+ /* pkt_type not yet used for incoming connections */
+ conn = hci_conn_add(hdev, ev->link_type, 0, &ev->bdaddr);
if (!conn) {
BT_ERR("No memory for new connection");
hci_dev_unlock(hdev);
@@ -1794,7 +1804,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
bacpy(&cp.bdaddr, &ev->bdaddr);
- if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+ if (lmp_rswitch_capable(hdev) && ((mask & HCI_LM_MASTER)
+ || is_sco_active(hdev)))
cp.role = 0x00; /* Become master */
else
cp.role = 0x01; /* Remain slave */
@@ -2963,6 +2974,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
hci_conn_add_sysfs(conn);
break;
+ case 0x10: /* Connection Accept Timeout */
case 0x11: /* Unsupported Feature or Parameter Value */
case 0x1c: /* SCO interval rejected */
case 0x1a: /* Unsupported Remote Feature */
@@ -3531,7 +3543,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+ conn = hci_conn_add(hdev, LE_LINK, 0, &ev->bdaddr);
if (!conn) {
BT_ERR("No memory for new connection");
goto unlock;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 68843a28a7af..c11a28bae844 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1793,10 +1793,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
auth_type = l2cap_get_auth_type(chan);
if (chan->dcid == L2CAP_CID_LE_DATA)
- hcon = hci_connect(hdev, LE_LINK, dst, dst_type,
+ hcon = hci_connect(hdev, LE_LINK, 0, dst, dst_type,
chan->sec_level, auth_type);
else
- hcon = hci_connect(hdev, ACL_LINK, dst, dst_type,
+ hcon = hci_connect(hdev, ACL_LINK, 0, dst, dst_type,
chan->sec_level, auth_type);
if (IS_ERR(hcon)) {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 36fed40c162c..81b2de6db5a8 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1006,7 +1006,8 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
- l2cap_sock_kill(sk);
+ if (!sock_flag(sk, SOCK_DEAD))
+ l2cap_sock_kill(sk);
}
static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f8ecbc70293d..3817728878da 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2205,10 +2205,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
auth_type = HCI_AT_DEDICATED_BONDING_MITM;
if (cp->addr.type == BDADDR_BREDR)
- conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr,
+ conn = hci_connect(hdev, ACL_LINK, 0, &cp->addr.bdaddr,
cp->addr.type, sec_level, auth_type);
else
- conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr,
+ conn = hci_connect(hdev, LE_LINK, 0, &cp->addr.bdaddr,
cp->addr.type, sec_level, auth_type);
if (IS_ERR(conn)) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ca957d34b0c8..0c77476d33d2 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -436,7 +436,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
switch (d->state) {
case BT_CONNECT:
- case BT_CONFIG:
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
rfcomm_schedule();
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 2bb1d3a5e76b..3178c7b4a171 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -158,6 +158,7 @@ static int sco_connect(struct sock *sk)
{
bdaddr_t *src = &bt_sk(sk)->src;
bdaddr_t *dst = &bt_sk(sk)->dst;
+ __u16 pkt_type = sco_pi(sk)->pkt_type;
struct sco_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
@@ -173,11 +174,13 @@ static int sco_connect(struct sock *sk)
if (lmp_esco_capable(hdev) && !disable_esco)
type = ESCO_LINK;
- else
+ else {
type = SCO_LINK;
+ pkt_type &= SCO_ESCO_MASK;
+ }
- hcon = hci_connect(hdev, type, dst, BDADDR_BREDR, BT_SECURITY_LOW,
- HCI_AT_NO_BONDING);
+ hcon = hci_connect(hdev, type, pkt_type, dst, BDADDR_BREDR,
+ BT_SECURITY_LOW, HCI_AT_NO_BONDING);
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto done;
@@ -445,17 +448,21 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
return 0;
}
-static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
{
- struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
+ struct sockaddr_sco sa;
struct sock *sk = sock->sk;
- int err = 0;
+ int len, err = 0;
- BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+ BT_DBG("sk %p %pMR", sk, &sa.sco_bdaddr);
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ memset(&sa, 0, sizeof(sa));
+ len = min_t(unsigned int, sizeof(sa), alen);
+ memcpy(&sa, addr, len);
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
@@ -468,7 +475,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
goto done;
}
- bacpy(&bt_sk(sk)->src, &sa->sco_bdaddr);
+ bacpy(&bt_sk(sk)->src, &sa.sco_bdaddr);
+ sco_pi(sk)->pkt_type = sa.sco_pkt_type;
sk->sk_state = BT_BOUND;
@@ -479,26 +487,34 @@ done:
static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
{
- struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
- int err;
+ struct sockaddr_sco sa;
+ int len, err;
BT_DBG("sk %p", sk);
- if (alen < sizeof(struct sockaddr_sco) ||
- addr->sa_family != AF_BLUETOOTH)
+ if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
-
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ memset(&sa, 0, sizeof(sa));
+ len = min_t(unsigned int, sizeof(sa), alen);
+ memcpy(&sa, addr, len);
lock_sock(sk);
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
+
/* Set destination address and psm */
- bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr);
+ bacpy(&bt_sk(sk)->dst, &sa.sco_bdaddr);
+ sco_pi(sk)->pkt_type = sa.sco_pkt_type;
err = sco_connect(sk);
if (err)
@@ -622,6 +638,7 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len
bacpy(&sa->sco_bdaddr, &bt_sk(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &bt_sk(sk)->src);
+ sa->sco_pkt_type = sco_pi(sk)->pkt_type;
return 0;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 967312803e41..239e0e84f9e6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -41,11 +41,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
}
#endif
- u64_stats_update_begin(&brstats->syncp);
- brstats->tx_packets++;
- brstats->tx_bytes += skb->len;
- u64_stats_update_end(&brstats->syncp);
-
if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
goto out;
@@ -54,6 +49,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
+ u64_stats_update_begin(&brstats->syncp);
+ brstats->tx_packets++;
+ /* Exclude ETH_HLEN from byte stats for consistency with Rx chain */
+ brstats->tx_bytes += skb->len;
+ u64_stats_update_end(&brstats->syncp);
+
if (is_broadcast_ether_addr(dest))
br_flood_deliver(br, skb);
else if (is_multicast_ether_addr(dest)) {
diff --git a/net/core/sock.c b/net/core/sock.c
index d743099250f4..7850d87c4a42 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -87,6 +87,8 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -139,6 +141,8 @@
#include <net/tcp.h>
#endif
+#include <linux/eventpoll.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -2516,6 +2520,142 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
+char *sk_get_waiting_task_cmdline(struct sock *sk, char *cmdline)
+{
+ bool softirq_enabled = false;
+ int res = 0;
+ unsigned int len;
+ char *program_name = cmdline;
+ struct task_struct *task = NULL;
+ struct mm_struct *mm = NULL;
+ static char *apk_path_prefix = "/data/data";
+ wait_queue_t *wq = NULL;
+ struct list_head *lh = NULL;
+ struct socket_wq *sk_wq = NULL;
+ wait_queue_func_t wait_func;
+ enum pid_type type;
+ struct pid *pid = NULL;
+ struct fown_struct *fown = NULL;
+ struct file *file;
+ int preempt_count;
+
+ *program_name = '\0';
+
+ if (!sk || !sk->sk_wq)
+ goto out;
+ lh = sk->sk_wq->wait.task_list.next;
+ if (!wq_has_sleeper(sk->sk_wq)) {
+ sk_wq = sk->sk_wq;
+ if (sk_wq->fasync_list && sk_wq->fasync_list->fa_file) {
+ fown = &sk_wq->fasync_list->fa_file->f_owner;
+ pid = fown->pid;
+ type = fown->pid_type;
+ do_each_pid_task(pid, type, task) {
+ if (task)
+ break;
+ } while_each_pid_task(pid, type, task);
+ }
+ } else {
+ lh = sk->sk_wq->wait.task_list.next;
+ wq = list_entry(lh, wait_queue_t, task_list);
+
+ wait_func = wq->func;
+ if (wait_func == pollwake)
+ task = ((struct poll_wqueues *)
+ (wq->private))->polling_task;
+ else if (wait_func == default_wake_function)
+ task = (struct task_struct *)(wq->private);
+ else if (wait_func == ep_poll_callback)
+ task = (struct task_struct *)(wq->private);
+ else if (wait_func == autoremove_wake_function)
+ task = (struct task_struct *)(wq->private);
+ else
+ pr_warning("Unknown wakeup:%p.\n", wait_func);
+
+ if (task)
+ task = get_thread_process(task);
+ }
+
+#ifdef CONFIG_EPOLL
+ if (!task) {
+ file = sk->sk_socket->file;
+ if (file)
+ task = get_epoll_file_task(file);
+ }
+#endif
+
+ if (!task && sk && sk->sk_socket)
+ task = SOCK_INODE(sk->sk_socket)->i_private;
+
+ if (!task) {
+ pr_err("Can't find a process for this sock.\n");
+ goto out;
+ }
+
+ mm = get_task_mm(task);
+ if (mm && mm->arg_end) {
+ len = mm->arg_end - mm->arg_start;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+
+ if (softirq_count()) {
+ softirq_enabled = true;
+ local_bh_enable();
+ }
+ if (preempt_count()) {
+ preempt_count = preempt_count();
+ preempt_count() = 0;
+ }
+
+ res = access_process_vm(task, mm->arg_start, cmdline, len, 0);
+
+ if (res > 0 && cmdline[res-1] != '\0' && len < PAGE_SIZE) {
+ len = strnlen(cmdline, res);
+ if (len < res) {
+ res = len;
+ } else {
+ len = mm->env_end - mm->env_start;
+ if (len > PAGE_SIZE - res)
+ len = PAGE_SIZE - res;
+ res += access_process_vm(task,
+ mm->env_start, cmdline+res, len, 0);
+ res = strnlen(cmdline, res);
+ }
+ }
+
+ if (preempt_count)
+ preempt_count() = preempt_count;
+ if (softirq_enabled)
+ local_bh_disable();
+
+ if (res > PAGE_SIZE)
+ cmdline[PAGE_SIZE-1] = '\0';
+
+ len = strlen(apk_path_prefix);
+ if (!strncmp(apk_path_prefix, program_name, len))
+ program_name += len;
+ else
+ program_name = strrchr(cmdline, '/');
+
+ if (program_name == NULL)
+ program_name = cmdline;
+ else
+ program_name++;
+ }
+
+ if (mm)
+ mmput(mm);
+
+ len = strlen(program_name);
+ snprintf(program_name + len, PAGE_SIZE-(program_name-cmdline)-len,
+ " %d %s", task->pid, task->comm);
+out:
+ return program_name;
+}
+EXPORT_SYMBOL(sk_get_waiting_task_cmdline);
+
+
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 089cb9f36387..5a9af0a9b0e6 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -15,6 +15,7 @@ obj-y := route.o inetpeer.o protocol.o \
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
+obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c4adc319cc2e..f022e0e97dc5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,19 @@
#include <linux/mroute.h>
#endif
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+ return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+ return 1;
+}
+#endif
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
@@ -284,6 +297,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (!current_has_network())
+ return -EACCES;
+
if (unlikely(!inet_ehash_secret))
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
build_ehash_secret();
@@ -336,8 +352,7 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern &&
- !ns_capable(net->user_ns, CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -905,6 +920,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFPFLAGS:
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
+ case SIOCKILLADDR:
err = devinet_ioctl(net, cmd, (void __user *)arg);
break;
default:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e40eef4ac697..b151e0ac7f27 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -59,6 +59,7 @@
#include <net/arp.h>
#include <net/ip.h>
+#include <net/tcp.h>
#include <net/route.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
@@ -918,6 +919,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFBRDADDR: /* Set the broadcast address */
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
+ case SIOCKILLADDR: /* Nuke all sockets on this address */
ret = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto out;
@@ -969,7 +971,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
}
ret = -EADDRNOTAVAIL;
- if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
+ if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
+ && cmd != SIOCKILLADDR)
goto done;
switch (cmd) {
@@ -1096,6 +1099,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
inet_insert_ifa(ifa);
}
break;
+ case SIOCKILLADDR: /* Nuke all connections on this address */
+ ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
+ break;
}
done:
rtnl_unlock();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 76e10b47e053..562efd91f457 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -939,7 +939,8 @@ error:
void icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
- struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+ int offset = iph->ihl<<2;
+ struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
int type = icmp_hdr(skb)->type;
int code = icmp_hdr(skb)->code;
struct net *net = dev_net(skb->dev);
@@ -949,7 +950,7 @@ void icmp_err(struct sk_buff *skb, u32 info)
* triggered by ICMP_ECHOREPLY which sent from kernel.
*/
if (icmph->type != ICMP_ECHOREPLY) {
- ping_err(skb, info);
+ ping_err(skb, offset, info);
return;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23e6ab0a2dc0..d9c4f113d709 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -368,7 +368,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
/*
* Handle MSG_ERRQUEUE
*/
-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
{
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
@@ -405,7 +405,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
- *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e7916c193932..00c2a3da3bdc 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,6 +110,18 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_REJECT_SKERR
+ bool "Force socket error when rejecting with icmp*"
+ depends on IP_NF_TARGET_REJECT
+ default n
+ help
+ This option enables turning a "--reject-with icmp*" into a matching
+ socket error also.
+ The REJECT target normally allows sending an ICMP message. But it
+ leaves the local socket unaware of any ingress rejects.
+
+ If unsure, say N.
+
config IP_NF_TARGET_ULOG
tristate "ULOG target support"
default m if NETFILTER_ADVANCED=n
@@ -237,7 +249,7 @@ config IP_NF_TARGET_CLUSTERIP
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
router/server/switch.
-
+
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ECN
@@ -246,7 +258,7 @@ config IP_NF_TARGET_ECN
depends on NETFILTER_ADVANCED
---help---
This option adds a `ECN' target, which can be used in the iptables mangle
- table.
+ table.
You can use this target to remove the ECN bits from the IPv4 header of
an IP packet. This is particularly useful, if you need to work around
@@ -271,7 +283,7 @@ config IP_NF_RAW
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
and OUTPUT chains.
-
+
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
@@ -283,7 +295,7 @@ config IP_NF_SECURITY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
-
+
If unsure, say N.
endif # IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1ac345..452e8a587c34 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -129,6 +129,14 @@ static void send_reset(struct sk_buff *oldskb, int hook)
static inline void send_unreach(struct sk_buff *skb_in, int code)
{
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+#ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR
+ if (skb_in->sk) {
+ skb_in->sk->sk_err = icmp_err_convert[code].errno;
+ skb_in->sk->sk_error_report(skb_in->sk);
+ pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n",
+ skb_in->sk->sk_err, skb_in, skb_in->sk);
+ }
+#endif
}
static unsigned int
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8cae28f5c3cf..7122472d24a6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -33,7 +33,6 @@
#include <linux/netdevice.h>
#include <net/snmp.h>
#include <net/ip.h>
-#include <net/ipv6.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -46,8 +45,18 @@
#include <net/inet_common.h>
#include <net/checksum.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#endif
-static struct ping_table ping_table;
+
+struct ping_table ping_table;
+struct pingv6_ops pingv6_ops;
+EXPORT_SYMBOL_GPL(pingv6_ops);
static u16 ping_port_rover;
@@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma
pr_debug("hash(%d) = %d\n", num, res);
return res;
}
+EXPORT_SYMBOL_GPL(ping_hash);
static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
struct net *net, unsigned int num)
@@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
}
-static int ping_v4_get_port(struct sock *sk, unsigned short ident)
+int ping_get_port(struct sock *sk, unsigned short ident)
{
struct hlist_nulls_node *node;
struct hlist_nulls_head *hlist;
@@ -103,6 +113,10 @@ next_port:
ping_portaddr_for_each_entry(sk2, node, hlist) {
isk2 = inet_sk(sk2);
+ /* BUG? Why is this reuse and not reuseaddr? ping.c
+ * doesn't turn off SO_REUSEADDR, and it doesn't expect
+ * that other ping processes can steal its packets.
+ */
if ((isk2->inet_num == ident) &&
(sk2 != sk) &&
(!sk2->sk_reuse || !sk->sk_reuse))
@@ -125,17 +139,18 @@ fail:
write_unlock_bh(&ping_table.lock);
return 1;
}
+EXPORT_SYMBOL_GPL(ping_get_port);
-static void ping_v4_hash(struct sock *sk)
+void ping_hash(struct sock *sk)
{
- pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
+ pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
}
-static void ping_v4_unhash(struct sock *sk)
+void ping_unhash(struct sock *sk)
{
struct inet_sock *isk = inet_sk(sk);
- pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+ pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
if (sk_hashed(sk)) {
write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
@@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk)
write_unlock_bh(&ping_table.lock);
}
}
+EXPORT_SYMBOL_GPL(ping_unhash);
-static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr,
- u16 ident, int dif)
+static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
{
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
+ int dif = skb->dev->ifindex;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
+ (int)ident, &ip_hdr(skb)->daddr, dif);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
+ (int)ident, &ipv6_hdr(skb)->daddr, dif);
+#endif
+ }
- pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
- (int)ident, &daddr, dif);
read_lock_bh(&ping_table.lock);
ping_portaddr_for_each_entry(sk, hnode, hslot) {
isk = inet_sk(sk);
- pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk,
- (int)isk->inet_num, &isk->inet_rcv_saddr,
- sk->sk_bound_dev_if);
-
pr_debug("iterate\n");
if (isk->inet_num != ident)
continue;
- if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
- continue;
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ sk->sk_family == AF_INET) {
+ pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk,
+ (int) isk->inet_num, &isk->inet_rcv_saddr,
+ sk->sk_bound_dev_if);
+
+ if (isk->inet_rcv_saddr &&
+ isk->inet_rcv_saddr != ip_hdr(skb)->daddr)
+ continue;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
+ sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
+ (int) isk->inet_num,
+ &inet6_sk(sk)->rcv_saddr,
+ sk->sk_bound_dev_if);
+
+ if (!ipv6_addr_any(&np->rcv_saddr) &&
+ !ipv6_addr_equal(&np->rcv_saddr,
+ &ipv6_hdr(skb)->daddr))
+ continue;
+#endif
+ }
+
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
@@ -200,33 +245,41 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
}
-static int ping_init_sock(struct sock *sk)
+int ping_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
- struct group_info *group_info = get_current_groups();
- int i, j, count = group_info->ngroups;
+ struct group_info *group_info;
+ int i, j, count;
kgid_t low, high;
+ int ret = 0;
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
+ group_info = get_current_groups();
+ count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
for (j = 0; j < cp_count; j++) {
kgid_t gid = group_info->blocks[i][j];
if (gid_lte(low, gid) && gid_lte(gid, high))
- return 0;
+ goto out_release_group;
}
count -= cp_count;
}
- return -EACCES;
+ ret = -EACCES;
+
+out_release_group:
+ put_group_info(group_info);
+ return ret;
}
+EXPORT_SYMBOL_GPL(ping_init_sock);
-static void ping_close(struct sock *sk, long timeout)
+void ping_close(struct sock *sk, long timeout)
{
pr_debug("ping_close(sk=%p,sk->num=%u)\n",
inet_sk(sk), inet_sk(sk)->inet_num);
@@ -234,36 +287,122 @@ static void ping_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
+EXPORT_SYMBOL_GPL(ping_close);
+
+/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
+int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+ struct sockaddr *uaddr, int addr_len) {
+ struct net *net = sock_net(sk);
+ if (sk->sk_family == AF_INET) {
+ struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ int chk_addr_ret;
+
+ if (addr_len < sizeof(*addr))
+ return -EINVAL;
+
+ pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
+ sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+ chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ chk_addr_ret = RTN_LOCAL;
+
+ if ((sysctl_ip_nonlocal_bind == 0 &&
+ isk->freebind == 0 && isk->transparent == 0 &&
+ chk_addr_ret != RTN_LOCAL) ||
+ chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST)
+ return -EADDRNOTAVAIL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
+ int addr_type, scoped, has_addr;
+ struct net_device *dev = NULL;
+
+ if (addr_len < sizeof(*addr))
+ return -EINVAL;
+
+ pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
+ sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
+
+ addr_type = ipv6_addr_type(&addr->sin6_addr);
+ scoped = __ipv6_addr_needs_scope_id(addr_type);
+ if ((addr_type != IPV6_ADDR_ANY &&
+ !(addr_type & IPV6_ADDR_UNICAST)) ||
+ (scoped && !addr->sin6_scope_id))
+ return -EINVAL;
+
+ rcu_read_lock();
+ if (addr->sin6_scope_id) {
+ dev = dev_get_by_index_rcu(net, addr->sin6_scope_id);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
+ has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
+ scoped);
+ rcu_read_unlock();
+
+ if (!(isk->freebind || isk->transparent || has_addr ||
+ addr_type == IPV6_ADDR_ANY))
+ return -EADDRNOTAVAIL;
+
+ if (scoped)
+ sk->sk_bound_dev_if = addr->sin6_scope_id;
+#endif
+ } else {
+ return -EAFNOSUPPORT;
+ }
+ return 0;
+}
+
+void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
+{
+ if (saddr->sa_family == AF_INET) {
+ struct inet_sock *isk = inet_sk(sk);
+ struct sockaddr_in *addr = (struct sockaddr_in *) saddr;
+ isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (saddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ np->rcv_saddr = np->saddr = addr->sin6_addr;
+#endif
+ }
+}
+
+void ping_clear_saddr(struct sock *sk, int dif)
+{
+ sk->sk_bound_dev_if = dif;
+ if (sk->sk_family == AF_INET) {
+ struct inet_sock *isk = inet_sk(sk);
+ isk->inet_rcv_saddr = isk->inet_saddr = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+ memset(&np->saddr, 0, sizeof(np->saddr));
+#endif
+ }
+}
/*
* We need our own bind because there are no privileged id's == local ports.
* Moreover, we don't allow binding to multi- and broadcast addresses.
*/
-static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *isk = inet_sk(sk);
unsigned short snum;
- int chk_addr_ret;
int err;
+ int dif = sk->sk_bound_dev_if;
- if (addr_len < sizeof(struct sockaddr_in))
- return -EINVAL;
-
- pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
- sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
-
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
- if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
- chk_addr_ret = RTN_LOCAL;
-
- if ((sysctl_ip_nonlocal_bind == 0 &&
- isk->freebind == 0 && isk->transparent == 0 &&
- chk_addr_ret != RTN_LOCAL) ||
- chk_addr_ret == RTN_MULTICAST ||
- chk_addr_ret == RTN_BROADCAST)
- return -EADDRNOTAVAIL;
+ err = ping_check_bind_addr(sk, isk, uaddr, addr_len);
+ if (err)
+ return err;
lock_sock(sk);
@@ -272,42 +411,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
goto out;
err = -EADDRINUSE;
- isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
- snum = ntohs(addr->sin_port);
- if (ping_v4_get_port(sk, snum) != 0) {
- isk->inet_saddr = isk->inet_rcv_saddr = 0;
+ ping_set_saddr(sk, uaddr);
+ snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port);
+ if (ping_get_port(sk, snum) != 0) {
+ ping_clear_saddr(sk, dif);
goto out;
}
- pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n",
+ pr_debug("after bind(): num = %d, dif = %d\n",
(int)isk->inet_num,
- &isk->inet_rcv_saddr,
(int)sk->sk_bound_dev_if);
err = 0;
- if (isk->inet_rcv_saddr)
+ if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
+ (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
isk->inet_sport = htons(isk->inet_num);
isk->inet_daddr = 0;
isk->inet_dport = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+#endif
+
sk_dst_reset(sk);
out:
release_sock(sk);
pr_debug("ping_v4_bind -> %d\n", err);
return err;
}
+EXPORT_SYMBOL_GPL(ping_bind);
/*
* Is this a supported type of ICMP message?
*/
-static inline int ping_supported(int type, int code)
+static inline int ping_supported(int family, int type, int code)
{
- if (type == ICMP_ECHO && code == 0)
- return 1;
- return 0;
+ return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
+ (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
}
/*
@@ -315,30 +462,42 @@ static inline int ping_supported(int type, int code)
* sort of error condition.
*/
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-
-void ping_err(struct sk_buff *skb, u32 info)
+void ping_err(struct sk_buff *skb, int offset, u32 info)
{
- struct iphdr *iph = (struct iphdr *)skb->data;
- struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+ int family;
+ struct icmphdr *icmph;
struct inet_sock *inet_sock;
- int type = icmp_hdr(skb)->type;
- int code = icmp_hdr(skb)->code;
+ int type;
+ int code;
struct net *net = dev_net(skb->dev);
struct sock *sk;
int harderr;
int err;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ family = AF_INET;
+ type = icmp_hdr(skb)->type;
+ code = icmp_hdr(skb)->code;
+ icmph = (struct icmphdr *)(skb->data + offset);
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ family = AF_INET6;
+ type = icmp6_hdr(skb)->icmp6_type;
+ code = icmp6_hdr(skb)->icmp6_code;
+ icmph = (struct icmphdr *) (skb->data + offset);
+ } else {
+ BUG();
+ }
+
/* We assume the packet has already been checked by icmp_unreach */
- if (!ping_supported(icmph->type, icmph->code))
+ if (!ping_supported(family, icmph->type, icmph->code))
return;
- pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
- code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
+ pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n",
+ skb->protocol, type, code, ntohs(icmph->un.echo.id),
+ ntohs(icmph->un.echo.sequence));
- sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
- ntohs(icmph->un.echo.id), skb->dev->ifindex);
+ sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
if (sk == NULL) {
pr_debug("no socket, dropping\n");
return; /* No socket for error */
@@ -349,72 +508,83 @@ void ping_err(struct sk_buff *skb, u32 info)
harderr = 0;
inet_sock = inet_sk(sk);
- switch (type) {
- default:
- case ICMP_TIME_EXCEEDED:
- err = EHOSTUNREACH;
- break;
- case ICMP_SOURCE_QUENCH:
- /* This is not a real error but ping wants to see it.
- * Report it with some fake errno. */
- err = EREMOTEIO;
- break;
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- harderr = 1;
- break;
- case ICMP_DEST_UNREACH:
- if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
- ipv4_sk_update_pmtu(skb, sk, info);
- if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
- err = EMSGSIZE;
- harderr = 1;
- break;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ switch (type) {
+ default:
+ case ICMP_TIME_EXCEEDED:
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_SOURCE_QUENCH:
+ /* This is not a real error but ping wants to see it.
+ * Report it with some fake errno.
+ */
+ err = EREMOTEIO;
+ break;
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ harderr = 1;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
+ ipv4_sk_update_pmtu(skb, sk, info);
+ if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
+ err = EMSGSIZE;
+ harderr = 1;
+ break;
+ }
+ goto out;
}
- goto out;
- }
- err = EHOSTUNREACH;
- if (code <= NR_ICMP_UNREACH) {
- harderr = icmp_err_convert[code].fatal;
- err = icmp_err_convert[code].errno;
+ err = EHOSTUNREACH;
+ if (code <= NR_ICMP_UNREACH) {
+ harderr = icmp_err_convert[code].fatal;
+ err = icmp_err_convert[code].errno;
+ }
+ break;
+ case ICMP_REDIRECT:
+ /* See ICMP_SOURCE_QUENCH */
+ ipv4_sk_redirect(skb, sk);
+ err = EREMOTEIO;
+ break;
}
- break;
- case ICMP_REDIRECT:
- /* See ICMP_SOURCE_QUENCH */
- ipv4_sk_redirect(skb, sk);
- err = EREMOTEIO;
- break;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ harderr = pingv6_ops.icmpv6_err_convert(type, code, &err);
+#endif
}
/*
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if (!inet_sock->recverr) {
+ if ((family == AF_INET && !inet_sock->recverr) ||
+ (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
- ip_icmp_error(sk, skb, err, 0 /* no remote port */,
- info, (u8 *)icmph);
+ if (family == AF_INET) {
+ ip_icmp_error(sk, skb, err, 0 /* no remote port */,
+ info, (u8 *)icmph);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ pingv6_ops.ipv6_icmp_error(sk, skb, err, 0,
+ info, (u8 *)icmph);
+#endif
+ }
}
sk->sk_err = err;
sk->sk_error_report(sk);
out:
sock_put(sk);
}
+EXPORT_SYMBOL_GPL(ping_err);
/*
- * Copy and checksum an ICMP Echo packet from user space into a buffer.
+ * Copy and checksum an ICMP Echo packet from user space into a buffer
+ * starting from the payload.
*/
-struct pingfakehdr {
- struct icmphdr icmph;
- struct iovec *iov;
- __wsum wcheck;
-};
-
-static int ping_getfrag(void *from, char *to,
- int offset, int fraglen, int odd, struct sk_buff *skb)
+int ping_getfrag(void *from, char *to,
+ int offset, int fraglen, int odd, struct sk_buff *skb)
{
struct pingfakehdr *pfh = (struct pingfakehdr *)from;
@@ -425,20 +595,33 @@ static int ping_getfrag(void *from, char *to,
pfh->iov, 0, fraglen - sizeof(struct icmphdr),
&pfh->wcheck))
return -EFAULT;
+ } else if (offset < sizeof(struct icmphdr)) {
+ BUG();
+ } else {
+ if (csum_partial_copy_fromiovecend
+ (to, pfh->iov, offset - sizeof(struct icmphdr),
+ fraglen, &pfh->wcheck))
+ return -EFAULT;
+ }
- return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ /* For IPv6, checksum each skb as we go along, as expected by
+ * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in
+ * wcheck, it will be finalized in ping_v4_push_pending_frames.
+ */
+ if (pfh->family == AF_INET6) {
+ skb->csum = pfh->wcheck;
+ skb->ip_summed = CHECKSUM_NONE;
+ pfh->wcheck = 0;
}
- if (offset < sizeof(struct icmphdr))
- BUG();
- if (csum_partial_copy_fromiovecend
- (to, pfh->iov, offset - sizeof(struct icmphdr),
- fraglen, &pfh->wcheck))
- return -EFAULT;
+#endif
+
return 0;
}
+EXPORT_SYMBOL_GPL(ping_getfrag);
-static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
- struct flowi4 *fl4)
+static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
+ struct flowi4 *fl4)
{
struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
@@ -450,24 +633,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
return ip_push_pending_frames(sk, fl4);
}
-static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
-{
- struct net *net = sock_net(sk);
- struct flowi4 fl4;
- struct inet_sock *inet = inet_sk(sk);
- struct ipcm_cookie ipc;
- struct icmphdr user_icmph;
- struct pingfakehdr pfh;
- struct rtable *rt = NULL;
- struct ip_options_data opt_copy;
- int free = 0;
- __be32 saddr, daddr, faddr;
- u8 tos;
- int err;
-
- pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
-
+int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
+ void *user_icmph, size_t icmph_len) {
+ u8 type, code;
if (len > 0xFFFF)
return -EMSGSIZE;
@@ -482,15 +650,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/*
* Fetch the ICMP header provided by the userland.
- * iovec is modified!
+ * iovec is modified! The ICMP header is consumed.
*/
-
- if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
- sizeof(struct icmphdr)))
+ if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len))
return -EFAULT;
- if (!ping_supported(user_icmph.type, user_icmph.code))
+
+ if (family == AF_INET) {
+ type = ((struct icmphdr *) user_icmph)->type;
+ code = ((struct icmphdr *) user_icmph)->code;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ type = ((struct icmp6hdr *) user_icmph)->icmp6_type;
+ code = ((struct icmp6hdr *) user_icmph)->icmp6_code;
+#endif
+ } else {
+ BUG();
+ }
+
+ if (!ping_supported(family, type, code))
return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ping_common_sendmsg);
+
+int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct net *net = sock_net(sk);
+ struct flowi4 fl4;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipcm_cookie ipc;
+ struct icmphdr user_icmph;
+ struct pingfakehdr pfh;
+ struct rtable *rt = NULL;
+ struct ip_options_data opt_copy;
+ int free = 0;
+ __be32 saddr, daddr, faddr;
+ u8 tos;
+ int err;
+
+ pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
/*
* Get and verify the address.
*/
@@ -595,13 +801,14 @@ back_from_confirm:
pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
pfh.iov = msg->msg_iov;
pfh.wcheck = 0;
+ pfh.family = AF_INET;
err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
0, &ipc, &rt, msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
else
- err = ping_push_pending_frames(sk, &pfh, &fl4);
+ err = ping_v4_push_pending_frames(sk, &pfh, &fl4);
release_sock(sk);
out:
@@ -622,10 +829,13 @@ do_confirm:
goto out;
}
-static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
+ int family = sk->sk_family;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
struct sk_buff *skb;
int copied, err;
@@ -635,8 +845,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (flags & MSG_ERRQUEUE)
- return ip_recv_error(sk, msg, len, addr_len);
+ if (addr_len) {
+ if (family == AF_INET)
+ *addr_len = sizeof(*sin);
+ else if (family == AF_INET6 && addr_len)
+ *addr_len = sizeof(*sin6);
+ }
+
+ if (flags & MSG_ERRQUEUE) {
+ if (family == AF_INET) {
+ return ip_recv_error(sk, msg, len);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ return pingv6_ops.ipv6_recv_error(sk, msg, len);
+#endif
+ }
+ }
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -655,18 +879,44 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sock_recv_timestamp(msg, sk, skb);
- /* Copy the address. */
- if (msg->msg_name) {
- struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+ /* Copy the address and add cmsg data. */
+ if (family == AF_INET) {
+ sin = (struct sockaddr_in *) msg->msg_name;
+ if (sin) {
+ sin->sin_family = AF_INET;
+ sin->sin_port = 0 /* skb->h.uh->source */;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ }
+
+ if (isk->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *ip6 = ipv6_hdr(skb);
+ sin6 = (struct sockaddr_in6 *) msg->msg_name;
+
+ if (sin6) {
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = 0;
+ sin6->sin6_addr = ip6->saddr;
+ sin6->sin6_flowinfo = 0;
+ if (np->sndflow)
+ sin6->sin6_flowinfo = ip6_flowinfo(ip6);
+ sin6->sin6_scope_id =
+ ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
+ }
- sin->sin_family = AF_INET;
- sin->sin_port = 0 /* skb->h.uh->source */;
- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
- memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *addr_len = sizeof(*sin);
+ if (inet6_sk(sk)->rxopt.all)
+ pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb);
+#endif
+ } else {
+ BUG();
}
- if (isk->cmsg_flags)
- ip_cmsg_recv(msg, skb);
+
err = copied;
done:
@@ -675,8 +925,9 @@ out:
pr_debug("ping_recvmsg -> %d\n", err);
return err;
}
+EXPORT_SYMBOL_GPL(ping_recvmsg);
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
@@ -687,6 +938,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
return 0;
}
+EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
/*
@@ -697,10 +949,7 @@ void ping_rcv(struct sk_buff *skb)
{
struct sock *sk;
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = ip_hdr(skb);
struct icmphdr *icmph = icmp_hdr(skb);
- __be32 saddr = iph->saddr;
- __be32 daddr = iph->daddr;
/* We assume the packet has already been checked by icmp_rcv */
@@ -710,8 +959,7 @@ void ping_rcv(struct sk_buff *skb)
/* Push ICMP header back */
skb_push(skb, skb->data - (u8 *)icmph);
- sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
- skb->dev->ifindex);
+ sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
if (sk != NULL) {
pr_debug("rcv on socket %p\n", sk);
ping_queue_rcv_skb(sk, skb_get(skb));
@@ -722,6 +970,7 @@ void ping_rcv(struct sk_buff *skb)
/* We're called from icmp_rcv(). kfree_skb() is done there. */
}
+EXPORT_SYMBOL_GPL(ping_rcv);
struct proto ping_prot = {
.name = "PING",
@@ -732,14 +981,14 @@ struct proto ping_prot = {
.disconnect = udp_disconnect,
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
- .sendmsg = ping_sendmsg,
+ .sendmsg = ping_v4_sendmsg,
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
.release_cb = ip4_datagram_release_cb,
- .hash = ping_v4_hash,
- .unhash = ping_v4_unhash,
- .get_port = ping_v4_get_port,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
.obj_size = sizeof(struct inet_sock),
};
EXPORT_SYMBOL(ping_prot);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 402870fdfa0e..448e5a77fa88 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -693,7 +693,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto out;
if (flags & MSG_ERRQUEUE) {
- err = ip_recv_error(sk, msg, len, addr_len);
+ err = ip_recv_error(sk, msg, len);
goto out;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 90b26beb84d4..4fca1aa4c505 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -138,6 +138,21 @@ static int ipv4_ping_group_range(ctl_table *table, int write,
return ret;
}
+/* Validate changes from /proc interface. */
+static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int old_value = *(int *)ctl->data;
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ int new_value = *(int *)ctl->data;
+
+ if (write && ret == 0 && (new_value < 3 || new_value > 100))
+ *(int *)ctl->data = old_value;
+
+ return ret;
+}
+
static int proc_tcp_congestion_control(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -737,7 +752,7 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- {
+ {
.procname = "tcp_thin_dupack",
.data = &sysctl_tcp_thin_dupack,
.maxlen = sizeof(int),
@@ -763,6 +778,13 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &gso_max_segs,
},
{
+ .procname = "tcp_default_init_rwnd",
+ .data = &sysctl_tcp_default_init_rwnd,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_default_init_rwnd
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 000000000000..0cbbf10026a6
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
+/*
+ * net/ipv4/sysfs_net_ipv4.c
+ *
+ * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <net/tcp.h>
+
+#define CREATE_IPV4_FILE(_name, _var) \
+static ssize_t _name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%d\n", _var); \
+} \
+static ssize_t _name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ int val, ret; \
+ ret = sscanf(buf, "%d", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ if (val < 0) \
+ return -EINVAL; \
+ _var = val; \
+ return count; \
+} \
+static struct kobj_attribute _name##_attr = \
+ __ATTR(_name, 0644, _name##_show, _name##_store)
+
+CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
+CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
+CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
+
+CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
+CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
+CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
+
+static struct attribute *ipv4_attrs[] = {
+ &tcp_wmem_min_attr.attr,
+ &tcp_wmem_def_attr.attr,
+ &tcp_wmem_max_attr.attr,
+ &tcp_rmem_min_attr.attr,
+ &tcp_rmem_def_attr.attr,
+ &tcp_rmem_max_attr.attr,
+ NULL
+};
+
+static struct attribute_group ipv4_attr_group = {
+ .attrs = ipv4_attrs,
+};
+
+static __init int sysfs_ipv4_init(void)
+{
+ struct kobject *ipv4_kobject;
+ int ret;
+
+ ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
+ if (!ipv4_kobject)
+ return -ENOMEM;
+
+ ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
+ if (ret) {
+ kobject_put(ipv4_kobject);
+ return ret;
+ }
+
+ return 0;
+}
+
+subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39bdb14b3214..82e3928aae20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,12 +268,16 @@
#include <linux/crypto.h>
#include <linux/time.h>
#include <linux/slab.h>
+#include <linux/uid_stat.h>
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
#include <net/netdma.h>
#include <net/sock.h>
@@ -1239,6 +1243,9 @@ out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
+
+ if (copied + copied_syn)
+ uid_stat_tcp_snd(current_uid(), copied + copied_syn);
return copied + copied_syn;
do_fault:
@@ -1543,6 +1550,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (copied > 0) {
tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
+ uid_stat_tcp_rcv(current_uid(), copied);
}
return copied;
}
@@ -1947,6 +1955,9 @@ skip_copy:
tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
+
+ if (copied > 0)
+ uid_stat_tcp_rcv(current_uid(), copied);
return copied;
out:
@@ -1955,6 +1966,8 @@ out:
recv_urg:
err = tcp_recv_urg(sk, msg, len, flags);
+ if (err > 0)
+ uid_stat_tcp_rcv(current_uid(), err);
goto out;
recv_sndq:
@@ -3467,3 +3480,107 @@ void __init tcp_init(void)
tcp_tasklet_init();
}
+
+static int tcp_is_local(struct net *net, __be32 addr) {
+ struct rtable *rt;
+ struct flowi4 fl4 = { .daddr = addr };
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR_OR_NULL(rt))
+ return 0;
+ return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int tcp_is_local6(struct net *net, struct in6_addr *addr) {
+ struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0);
+ return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK);
+}
+#endif
+
+/*
+ * tcp_nuke_addr - destroy all sockets on the given local address
+ * if local address is the unspecified address (0.0.0.0 or ::), destroy all
+ * sockets with local addresses that are not configured.
+ */
+int tcp_nuke_addr(struct net *net, struct sockaddr *addr)
+{
+ int family = addr->sa_family;
+ unsigned int bucket;
+
+ struct in_addr *in;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct in6_addr *in6;
+#endif
+ if (family == AF_INET) {
+ in = &((struct sockaddr_in *)addr)->sin_addr;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ } else if (family == AF_INET6) {
+ in6 = &((struct sockaddr_in6 *)addr)->sin6_addr;
+#endif
+ } else {
+ return -EAFNOSUPPORT;
+ }
+
+ for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
+
+restart:
+ spin_lock_bh(lock);
+ sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
+ continue;
+ if (sock_flag(sk, SOCK_DEAD))
+ continue;
+
+ if (family == AF_INET) {
+ __be32 s4 = inet->inet_rcv_saddr;
+ if (s4 == LOOPBACK4_IPV6)
+ continue;
+
+ if (in->s_addr != s4 &&
+ !(in->s_addr == INADDR_ANY &&
+ !tcp_is_local(net, s4)))
+ continue;
+ }
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ if (family == AF_INET6) {
+ struct in6_addr *s6;
+ if (!inet->pinet6)
+ continue;
+
+ s6 = &inet->pinet6->rcv_saddr;
+ if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED)
+ continue;
+
+ if (!ipv6_addr_equal(in6, s6) &&
+ !(ipv6_addr_equal(in6, &in6addr_any) &&
+ !tcp_is_local6(net, s6)))
+ continue;
+ }
+#endif
+
+ sock_hold(sk);
+ spin_unlock_bh(lock);
+
+ local_bh_disable();
+ bh_lock_sock(sk);
+ sk->sk_err = ETIMEDOUT;
+ sk->sk_error_report(sk);
+
+ tcp_done(sk);
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ sock_put(sk);
+
+ goto restart;
+ }
+ spin_unlock_bh(lock);
+ }
+
+ return 0;
+}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e15d330919af..aa5f3bfebabd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,6 +98,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_early_retrans __read_mostly = 3;
+int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -351,14 +352,14 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
static void tcp_fixup_rcvbuf(struct sock *sk)
{
u32 mss = tcp_sk(sk)->advmss;
- u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+ u32 icwnd = sysctl_tcp_default_init_rwnd;
int rcvmem;
/* Limit to 10 segments if mss <= 1460,
* or 14600/mss segments, with a minimum of two segments.
*/
if (mss > 1460)
- icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+ icwnd = max_t(u32, (1460 * icwnd) / mss, 2);
rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
while (tcp_win_from_space(rcvmem) < mss)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5d87806d3ade..bc172b836ed6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -48,6 +48,8 @@
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#define pr_fmt(fmt) "TCP: " fmt
@@ -2661,6 +2663,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
int rx_queue;
+ unsigned long cmdline = __get_free_page(GFP_TEMPORARY);
+ if (cmdline == NULL)
+ return;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2687,7 +2692,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
- "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
+ "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d %s%n",
i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
rx_queue,
@@ -2705,7 +2710,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
sk->sk_state == TCP_LISTEN ?
(fastopenq ? fastopenq->max_qlen : 0) :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
+ sk_get_waiting_task_cmdline(sk, cmdline),
len);
+
+ free_page(cmdline);
}
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
@@ -2736,9 +2744,10 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "%-*s\n", TMPSZ - 1,
- " sl local_address rem_address st tx_queue "
- "rx_queue tr tm->when retrnsmt uid timeout "
- "inode");
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode "
+ "cmdline");
goto out;
}
st = seq->private;
@@ -2755,7 +2764,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
get_timewait4_sock(v, seq, st->num, &len);
break;
}
- seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
+ seq_printf(seq, "\n");
out:
return 0;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4a4e8746d1b2..9e171160f485 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,14 +231,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set initial window to a value enough for senders starting with
- * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
+ * initial congestion window of sysctl_tcp_default_init_rwnd. Place
* a limit on the initial window when mss is larger than 1460.
*/
if (mss > (1 << *rcv_wscale)) {
- int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
+ int init_cwnd = sysctl_tcp_default_init_rwnd;
if (mss > 1460)
- init_cwnd =
- max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+ init_cwnd = max_t(u32, (1460 * init_cwnd) / mss, 2);
/* when initializing use the value from init_rcv_wnd
* rather than the default from above
*/
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c3075b552248..313f1f73bdbe 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -75,6 +75,8 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#define pr_fmt(fmt) "UDP: " fmt
@@ -1211,7 +1213,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
bool slow;
if (flags & MSG_ERRQUEUE)
- return ip_recv_error(sk, msg, len, addr_len);
+ return ip_recv_error(sk, msg, len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
@@ -2148,9 +2150,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
+ unsigned long cmdline = __get_free_page(GFP_TEMPORARY);
+ if (cmdline == NULL)
+ return;
seq_printf(f, "%5d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d %s%n",
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
@@ -2158,7 +2163,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops), len);
+ atomic_read(&sp->sk_drops),
+ sk_get_waiting_task_cmdline(sp, cmdline), len);
+ free_page(cmdline);
}
int udp4_seq_show(struct seq_file *seq, void *v)
@@ -2167,13 +2174,13 @@ int udp4_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%-127s\n",
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
- "inode ref pointer drops");
+ "inode ref pointer drops cmdline");
else {
struct udp_iter_state *state = seq->private;
int len;
udp4_format_sock(v, seq, state->bucket, &len);
- seq_printf(seq, "%*s\n", 127 - len, "");
+ seq_printf(seq, "\n");
}
return 0;
}
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9af088d2cdaa..470a9c008e9b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ab5c7ad482cd..a4cfde67fcb7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,6 +49,7 @@
#include <net/udp.h>
#include <net/udplite.h>
#include <net/tcp.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/route.h>
@@ -62,6 +63,20 @@
#include <asm/uaccess.h>
#include <linux/mroute6.h>
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+ return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+ return 1;
+}
+#endif
+
MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
MODULE_LICENSE("GPL");
@@ -108,6 +123,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (!current_has_network())
+ return -EACCES;
+
if (sock->type != SOCK_RAW &&
sock->type != SOCK_DGRAM &&
!inet_ehash_secret)
@@ -159,8 +177,7 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern &&
- !ns_capable(net->user_ns, CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -477,6 +494,21 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
EXPORT_SYMBOL(inet6_getname);
+int inet6_killaddr_ioctl(struct net *net, void __user *arg) {
+ struct in6_ifreq ireq;
+ struct sockaddr_in6 sin6;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EACCES;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ireq.ifr6_addr;
+ return tcp_nuke_addr(net, (struct sockaddr *) &sin6);
+}
+
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -500,6 +532,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return addrconf_del_ifaddr(net, (void __user *) arg);
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr(net, (void __user *) arg);
+ case SIOCKILLADDR:
+ return inet6_killaddr_ioctl(net, (void __user *) arg);
default:
if (!sk->sk_prot->ioctl)
return -ENOIOCTLCMD;
@@ -840,6 +874,9 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_udplite_proto;
+ err = proto_register(&pingv6_prot, 1);
+ if (err)
+ goto out_unregister_ping_proto;
/* We MUST register RAW sockets before we create the ICMP6,
* IGMP6, or NDISC control sockets.
@@ -930,6 +967,10 @@ static int __init inet6_init(void)
if (err)
goto ipv6_packet_fail;
+ err = pingv6_init();
+ if (err)
+ goto pingv6_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -942,6 +983,8 @@ out:
sysctl_fail:
ipv6_packet_cleanup();
#endif
+pingv6_fail:
+ pingv6_exit();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -985,6 +1028,8 @@ register_pernet_fail:
rtnl_unregister_all(PF_INET6);
out_sock_register_fail:
rawv6_exit();
+out_unregister_ping_proto:
+ proto_unregister(&pingv6_prot);
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
out_unregister_udplite_proto:
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 51af9d0d019a..11de7379fb9a 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -166,15 +166,15 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
* to explore inner IPv6 header, eg. ICMPv6 error messages.
*
* If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
+ * number. Otherwise, return -ENOENT or -EBADMSG.
*
* If the first fragment doesn't contain the final protocol header or
* NEXTHDR_NONE it is considered invalid.
*
* Note that non-1st fragment is special case that "the protocol number
* of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
+ * *offset is meaningless. If fragoff is not NULL, the fragment offset is
+ * stored in *fragoff; if it is NULL, return -EINVAL.
*
* if flags is not NULL and it's a fragment, then the frag flag
* IP6_FH_F_FRAG will be set. If it's an AH header, the
@@ -253,9 +253,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
if (target < 0 &&
((!ipv6_ext_hdr(hp->nexthdr)) ||
hp->nexthdr == NEXTHDR_NONE)) {
- if (fragoff)
+ if (fragoff) {
*fragoff = _frag_off;
- return hp->nexthdr;
+ return hp->nexthdr;
+ } else {
+ return -EINVAL;
+ }
}
return -ENOENT;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2dee1d9d7305..cc368c5bc437 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
@@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net)
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
struct net *net = dev_net(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, 0, 0);
else if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, 0, 0);
+
+ if (!(type & ICMPV6_INFOMSG_MASK))
+ if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+ ping_err(skb, offset, info);
}
static int icmpv6_rcv(struct sk_buff *skb);
@@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
@@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif
-static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
- struct sock *sk, struct flowi6 *fl6)
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+ struct sock *sk, struct flowi6 *fl6)
{
struct dst_entry *dst, *dst2;
struct flowi6 fl2;
@@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
IPPROTO_ICMPV6, 0));
if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
saddr, daddr);
goto csum_error;
}
@@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- /* we couldn't care less */
+ ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40e7de..7f45af3e8128 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,18 @@ config IP6_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_TARGET_REJECT_SKERR
+ bool "Force socket error when rejecting with icmp*"
+ depends on IP6_NF_TARGET_REJECT
+ default n
+ help
+ This option enables turning a "--reject-with icmp*" into a matching
+ socket error also.
+ The REJECT target normally allows sending an ICMP message. But it
+ leaves the local socket unaware of any ingress rejects.
+
+ If unsure, say N.
+
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc0efe9..573c232239b0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -180,6 +180,15 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
skb_in->dev = net->loopback_dev;
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR
+ if (skb_in->sk) {
+ icmpv6_err_convert(ICMPV6_DEST_UNREACH, code,
+ &skb_in->sk->sk_err);
+ skb_in->sk->sk_error_report(skb_in->sk);
+ pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n",
+ skb_in->sk->sk_err, skb_in, skb_in->sk);
+ }
+#endif
}
static unsigned int
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 000000000000..858788da5a2f
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,221 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * "Ping" sockets
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors: Lorenzo Colitti (IPv6 support)
+ * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+ .name = "PINGv6",
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .sendmsg = ping_v6_sendmsg,
+ .recvmsg = ping_recvmsg,
+ .bind = ping_bind,
+ .backlog_rcv = ping_queue_rcv_skb,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
+ .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+ .prot = &pingv6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ return -EAFNOSUPPORT;
+}
+void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload) {}
+int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, int strict)
+{
+ return 0;
+}
+
+int __init pingv6_init(void)
+{
+ pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+ return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+ pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+ inet6_unregister_protosw(&pingv6_protosw);
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr user_icmph;
+ int addr_type;
+ struct in6_addr *daddr;
+ int iif = 0;
+ struct flowi6 fl6;
+ int err;
+ int hlimit;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct pingfakehdr pfh;
+
+ pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
+ if (msg->msg_name) {
+ struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+ if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+ u->sin6_family != AF_INET6) {
+ return -EINVAL;
+ }
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != u->sin6_scope_id) {
+ return -EINVAL;
+ }
+ daddr = &(u->sin6_addr);
+ iif = u->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &np->daddr;
+ }
+
+ if (!iif)
+ iif = sk->sk_bound_dev_if;
+
+ addr_type = ipv6_addr_type(daddr);
+ if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+ return -EINVAL;
+ if (addr_type & IPV6_ADDR_MAPPED)
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.saddr = np->saddr;
+ fl6.daddr = *daddr;
+ fl6.fl6_icmp_type = user_icmph.icmp6_type;
+ fl6.fl6_icmp_code = user_icmph.icmp6_code;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+ if (!np)
+ return -EBADF;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ pfh.icmph.type = user_icmph.icmp6_type;
+ pfh.icmph.code = user_icmph.icmp6_code;
+ pfh.icmph.checksum = 0;
+ pfh.icmph.un.echo.id = inet->inet_sport;
+ pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+ pfh.iov = msg->msg_iov;
+ pfh.wcheck = 0;
+ pfh.family = AF_INET6;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ lock_sock(sk);
+ err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ 0, hlimit,
+ np->tclass, NULL, &fl6, rt,
+ MSG_DONTWAIT, np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *) &pfh.icmph,
+ len);
+ }
+ release_sock(sk);
+
+ if (err)
+ return err;
+
+ return len;
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 66c718854e5a..151535484a03 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -21,6 +21,8 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#include <linux/bottom_half.h>
@@ -1759,6 +1761,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct ipv6_pinfo *np = inet6_sk(sp);
+ unsigned long cmdline = __get_free_page(GFP_TEMPORARY);
+ if (cmdline == NULL)
+ return;
dest = &np->daddr;
src = &np->rcv_saddr;
@@ -1781,7 +1786,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d %s\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1801,8 +1806,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
+ sk_get_waiting_task_cmdline(sp, cmdline)
);
+ free_page(cmdline);
}
static void get_timewait6_sock(struct seq_file *seq,
@@ -1841,7 +1848,7 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
"local_address "
"remote_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode\n");
+ " uid timeout inode cmdline\n");
goto out;
}
st = seq->private;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6b298dc614e3..99636a77cdfc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -19,6 +19,8 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#include <linux/errno.h>
@@ -1368,6 +1370,9 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
struct ipv6_pinfo *np = inet6_sk(sp);
const struct in6_addr *dest, *src;
__u16 destp, srcp;
+ unsigned long cmdline = __get_free_page(GFP_TEMPORARY);
+ if (cmdline == NULL)
+ return;
dest = &np->daddr;
src = &np->rcv_saddr;
@@ -1375,7 +1380,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
srcp = ntohs(inet->inet_sport);
seq_printf(seq,
"%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d %s\n",
bucket,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1389,7 +1394,9 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
0,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
+ atomic_read(&sp->sk_drops),
+ sk_get_waiting_task_cmdline(sp, cmdline));
+ free_page(cmdline);
}
int udp6_seq_show(struct seq_file *seq, void *v)
@@ -1400,7 +1407,7 @@ int udp6_seq_show(struct seq_file *seq, void *v)
"local_address "
"remote_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
+ " uid timeout inode ref pointer drops cmdline\n");
else
udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
return 0;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 62535fe9f570..3d533f1a1f27 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -291,3 +291,12 @@ config MAC80211_DEBUG_COUNTERS
and show them in debugfs.
If unsure, say N.
+
+config MAC80211_SCAN_ABORT
+ bool "Support to abort a scan request on tx"
+ depends on MAC80211
+ depends on WIRELESS
+ default n
+ ---help---
+ Add support for aborting scan requests.
+ This is only for ChromeOs, because it will affect Android P2P.
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 92ef04c72c51..8b7a15383108 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -866,6 +866,8 @@ enum {
* send out data
* @SCAN_RESUME: Resume the scan and scan the next channel
* @SCAN_ABORT: Abort the scan and go back to operating channel
+ * @SCAN_SUSPEND_ABORT: Return to the operating channel then
+ * terminate the scan operation.
*/
enum mac80211_scan_state {
SCAN_DECISION,
@@ -874,6 +876,7 @@ enum mac80211_scan_state {
SCAN_SUSPEND,
SCAN_RESUME,
SCAN_ABORT,
+ SCAN_SUSPEND_ABORT,
};
struct ieee80211_local {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index eb03337b6545..e1ae2388e2b4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -659,9 +659,20 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
/* disable PS */
ieee80211_offchannel_return(local);
+#ifndef CONFIG_MAC80211_SCAN_ABORT
*next_delay = HZ / 5;
/* afterwards, resume scan & go to next channel */
local->next_scan_state = SCAN_RESUME;
+#else
+ if (local->next_scan_state == SCAN_SUSPEND) {
+ *next_delay = HZ / 5;
+ /* afterwards, resume scan & go to next channel */
+ local->next_scan_state = SCAN_RESUME;
+ } else {
+ *next_delay = 0;
+ local->next_scan_state = SCAN_ABORT;
+ }
+#endif
}
static void ieee80211_scan_state_resume(struct ieee80211_local *local,
@@ -760,6 +771,9 @@ void ieee80211_scan_work(struct work_struct *work)
ieee80211_scan_state_send_probe(local, &next_delay);
break;
case SCAN_SUSPEND:
+#ifdef CONFIG_MAC80211_SCAN_ABORT
+ case SCAN_SUSPEND_ABORT:
+#endif
ieee80211_scan_state_suspend(local, &next_delay);
break;
case SCAN_RESUME:
diff --git a/net/mhi/Kconfig b/net/mhi/Kconfig
new file mode 100644
index 000000000000..7f4e05c086e4
--- /dev/null
+++ b/net/mhi/Kconfig
@@ -0,0 +1,91 @@
+#
+# MHI protocol family and drivers
+#
+
+config MHI
+ bool "Modem-Host Interface"
+ default n
+ help
+ The Modem-Host Interface (MHI) is a packet-oriented transport protocol
+ developed by Renesas Mobile for use with their modems.
+
+ If unsure, say N.
+
+
+if MHI
+
+config MHI_L2MUX
+ tristate "L2 MUX Protocol Layer for MHI"
+ default y
+ help
+ L2 MUX is a protocol layer in the MHI stack. It is required
+ by the MHI L3 components.
+
+ To compile this driver as a module, choose M here: the module
+ will be called l2mux. If unsure, say Y.
+
+config MHI_L3MHI
+ tristate "L3 MHI Protocol Family (AF_MHI)"
+ select MHI_L2MUX
+ default y
+ help
+ AF_MHI provides datagram access to L2 channels in MHI,
+ developed by Renesas Mobile for use with their modems.
+
+ To compile this driver as a module, choose M here: the modules
+ will be called l3mhi and af_mhi. If unsure, say Y.
+
+config MHI_L3PHONET
+ tristate "L3 PHONET Protocol bridge (AF_PHONET)"
+ select MHI_L2MUX
+ select PHONET
+ default y
+ help
+ L3 PHONET protocol for MHI protocol family,
+ developed by Renesas Mobile for use with their modems.
+
+ This driver is a bridge between MHI L3 Phonet and Phonet Protocol Family.
+
+ To compile this driver as a module, choose M here: the module
+ will be called l3phonet. If unsure, say Y.
+
+config MHI_L3MHDP
+ tristate "L3 MHDP IP Tunneling Protocol"
+ select MHI_L2MUX
+ select INET_TUNNEL
+ default y
+ help
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This particular tunneling driver implements
+ encapsulation of IP within MHDP (Modem Host Data Protocol), which
+ is used for communication between the APE and the Modem.
+
+ To compile this driver as a module, choose M here: the module
+ will be called l3mhdp. If unsure, say Y.
+
+
+config MHI_DEBUG
+ bool "MHI Debugging"
+ default n
+ help
+ Generate lots of debugging messages in the MHI stack.
+ This option is useful when developing MHI.
+ Otherwise it should be off.
+
+ If unsure, say N.
+
+config MHI_DUMP_FRAMES
+ bool "Dump MHI frames on L2 layer"
+ default n
+ help
+ Print out every frame passed through L2MUX into kernel log.
+ This option is useful when developing MHI. Otherwise it should be off.
+
+ If unsure, say N.
+
+config MHDP_BONDING_SUPPORT
+ bool "use mhdp as a bonding slave"
+ default n
+endif
+
diff --git a/net/mhi/Makefile b/net/mhi/Makefile
new file mode 100644
index 000000000000..64a2899fa4b0
--- /dev/null
+++ b/net/mhi/Makefile
@@ -0,0 +1,10 @@
+
+obj-$(CONFIG_MHI_L3MHI) += af_mhi.o
+
+af_mhi-objs := mhi_proto.o mhi_socket.o mhi_dgram.o mhi_raw.o
+
+obj-$(CONFIG_MHI_L2MUX) += l2mux.o
+obj-$(CONFIG_MHI_L3MHI) += l3mhi.o
+obj-$(CONFIG_MHI_L3MHDP) += l3mhdp.o
+obj-$(CONFIG_MHI_L3PHONET) += l3phonet.o
+
diff --git a/net/mhi/l2mux.c b/net/mhi/l2mux.c
new file mode 100644
index 000000000000..b2bb0e4522e6
--- /dev/null
+++ b/net/mhi/l2mux.c
@@ -0,0 +1,280 @@
+/*
+ * File: l2mux.c
+ *
+ * Modem-Host Interface (MHI) L2MUX layer
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/if_mhi.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <net/af_mhi.h>
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "MHI/L2MUX: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/* Handle ONLY Non DIX types 0x00-0xff */
+#define ETH_NON_DIX_NPROTO 0x0100
+
+
+/* L2MUX master lock */
+static DEFINE_SPINLOCK(l2mux_lock);
+
+/* L3 ID -> RX function table */
+static l2mux_skb_fn *l2mux_id2rx_tab[MHI_L3_NPROTO] __read_mostly;
+
+/* Packet Type -> TX function table */
+static l2mux_skb_fn *l2mux_pt2tx_tab[ETH_NON_DIX_NPROTO] __read_mostly;
+
+
+int l2mux_netif_rx_register(int l3, l2mux_skb_fn *fn)
+{
+ int err = 0;
+
+ DPRINTK("l2mux_netif_rx_register(l3:%d, fn:%p)\n", l3, fn);
+
+ if (l3 < 0 || l3 >= MHI_L3_NPROTO)
+ return -EINVAL;
+
+ if (!fn)
+ return -EINVAL;
+
+ spin_lock(&l2mux_lock);
+ {
+ if (l2mux_id2rx_tab[l3] == NULL)
+ l2mux_id2rx_tab[l3] = fn;
+ else
+ err = -EBUSY;
+ }
+ spin_unlock(&l2mux_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(l2mux_netif_rx_register);
+
+int l2mux_netif_rx_unregister(int l3)
+{
+ int err = 0;
+
+ DPRINTK("l2mux_netif_rx_unregister(l3:%d)\n", l3);
+
+ if (l3 < 0 || l3 >= MHI_L3_NPROTO)
+ return -EINVAL;
+
+ spin_lock(&l2mux_lock);
+ {
+ if (l2mux_id2rx_tab[l3])
+ l2mux_id2rx_tab[l3] = NULL;
+ else
+ err = -EPROTONOSUPPORT;
+ }
+ spin_unlock(&l2mux_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(l2mux_netif_rx_unregister);
+
+int l2mux_netif_tx_register(int pt, l2mux_skb_fn *fn)
+{
+ int err = 0;
+
+ DPRINTK("l2mux_netif_tx_register(pt:%d, fn:%p)\n", pt, fn);
+
+ if (pt <= 0 || pt >= ETH_NON_DIX_NPROTO)
+ return -EINVAL;
+
+ if (!fn)
+ return -EINVAL;
+
+ spin_lock(&l2mux_lock);
+ {
+ if (l2mux_pt2tx_tab[pt] == NULL)
+ l2mux_pt2tx_tab[pt] = fn;
+ else
+ err = -EBUSY;
+ }
+ spin_unlock(&l2mux_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(l2mux_netif_tx_register);
+
+int l2mux_netif_tx_unregister(int pt)
+{
+ int err = 0;
+
+ DPRINTK("l2mux_netif_tx_unregister(pt:%d)\n", pt);
+
+ if (pt <= 0 || pt >= ETH_NON_DIX_NPROTO)
+ return -EINVAL;
+
+ spin_lock(&l2mux_lock);
+ {
+ if (l2mux_pt2tx_tab[pt])
+ l2mux_pt2tx_tab[pt] = NULL;
+ else
+ err = -EPROTONOSUPPORT;
+ }
+ spin_unlock(&l2mux_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(l2mux_netif_tx_unregister);
+
+int l2mux_skb_rx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct l2muxhdr *l2hdr;
+ unsigned l3pid;
+ unsigned l3len;
+ l2mux_skb_fn *rxfn;
+
+ /* Set the device in the skb */
+ skb->dev = dev;
+
+ /* Set MAC header here */
+ skb_reset_mac_header(skb);
+
+ /* L2MUX header */
+ l2hdr = l2mux_hdr(skb);
+
+ /* proto id and length in L2 header */
+ l3pid = l2mux_get_proto(l2hdr);
+ l3len = l2mux_get_length(l2hdr);
+
+ DPRINTK("L2MUX: RX dev:%d skb_len:%d l3_len:%d l3_pid:%d\n",
+ skb->dev->ifindex, skb->len, l3len, l3pid);
+
+#ifdef CONFIG_MHI_DUMP_FRAMES
+ {
+ u8 *ptr = skb->data;
+ int len = skb_headlen(skb);
+ int i;
+
+ printk(KERN_DEBUG "L2MUX: RX dev:%d skb_len:%d l3_len:%d l3_pid:%d\n",
+ dev->ifindex, skb->len, l3len, l3pid);
+
+ for (i = 0; i < len; i++) {
+ if (i%8 == 0)
+ printk(KERN_DEBUG "L2MUX: RX [%04X] ", i);
+ printk(" 0x%02X", ptr[i]);
+ if (i%8 == 7 || i == len-1)
+ printk("\n");
+ }
+ }
+#endif
+ /* check that the advertised length is correct */
+ if (l3len != skb->len - L2MUX_HDR_SIZE) {
+ printk(KERN_WARNING "L2MUX: l2mux_skb_rx: L3_id:%d - skb length mismatch L3:%d (+4) <> SKB:%d",
+ l3pid, l3len, skb->len);
+ goto drop;
+ }
+
+ /* get RX function */
+ rxfn = l2mux_id2rx_tab[l3pid];
+
+ /* Not registered */
+ if (!rxfn)
+ goto drop;
+
+ /* Call the receiver function */
+ return rxfn(skb, dev);
+
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+EXPORT_SYMBOL(l2mux_skb_rx);
+
+int l2mux_skb_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ l2mux_skb_fn *txfn;
+ unsigned type;
+
+ /* Packet type ETH_P_XXX */
+ type = ntohs(skb->protocol);
+
+#ifdef CONFIG_MHI_DUMP_FRAMES
+ {
+ u8 *ptr = skb->data;
+ int len = skb_headlen(skb);
+ int i;
+
+ printk(KERN_DEBUG "L2MUX: TX dev:%d skb_len:%d ETH_P:%d\n",
+ dev->ifindex, skb->len, type);
+
+ for (i = 0; i < len; i++) {
+ if (i%8 == 0)
+ printk(KERN_DEBUG "L2MUX: TX [%04X] ", i);
+ printk(" 0x%02X", ptr[i]);
+ if (i%8 == 7 || i == len-1)
+ printk("\n");
+ }
+ }
+#endif
+ /* Only handling non DIX types */
+ if (type <= 0 || type >= ETH_NON_DIX_NPROTO)
+ return -EINVAL;
+
+ /* TX function for this packet type */
+ txfn = l2mux_pt2tx_tab[type];
+
+ if (txfn)
+ return txfn(skb, dev);
+
+ return 0;
+}
+EXPORT_SYMBOL(l2mux_skb_tx);
+
+static int __init l2mux_init(void)
+{
+ int i;
+
+ DPRINTK("l2mux_init\n");
+
+ for (i = 0; i < MHI_L3_NPROTO; i++)
+ l2mux_id2rx_tab[i] = NULL;
+
+ for (i = 0; i < ETH_NON_DIX_NPROTO; i++)
+ l2mux_pt2tx_tab[i] = NULL;
+
+ return 0;
+}
+
+static void __exit l2mux_exit(void)
+{
+ DPRINTK("l2mux_exit\n");
+}
+
+module_init(l2mux_init);
+module_exit(l2mux_exit);
+
+MODULE_AUTHOR("Petri Mattila <petri.to.mattila@renesasmobile.com>");
+MODULE_DESCRIPTION("L2MUX for MHI Protocol Stack");
+MODULE_LICENSE("GPL");
+
diff --git a/net/mhi/l3mhdp.c b/net/mhi/l3mhdp.c
new file mode 100644
index 000000000000..b9e641e4b060
--- /dev/null
+++ b/net/mhi/l3mhdp.c
@@ -0,0 +1,993 @@
+/*
+ * File: l3mhdp.c
+ *
+ * MHDP - Modem Host Data Protocol for MHI protocol family.
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Sugnan Prabhu S <sugnan.prabhu@renesasmobile.com>
+ * Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * Based on work by: Sam Lantinga (slouken@cs.ucdavis.edu)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/l2mux.h>
+#include <linux/etherdevice.h>
+#include <linux/pkt_sched.h>
+
+#ifdef CONFIG_MHDP_BONDING_SUPPORT
+#define MHDP_BONDING_SUPPORT
+#endif
+
+/*#define MHDP_USE_NAPI*/
+
+#ifdef MHDP_BONDING_SUPPORT
+#include <linux/etherdevice.h>
+#endif /* MHDP_BONDING_SUPPORT */
+
+#include <net/netns/generic.h>
+#include <net/mhi/mhdp.h>
+
+
+/* MHDP device MTU limits */
+#define MHDP_MTU_MAX 0x2400
+#define MHDP_MTU_MIN 0x44
+#define MAX_MHDP_FRAME_SIZE 16000
+
+/* MHDP device names */
+#define MHDP_IFNAME "rmnet%d"
+#define MHDP_CTL_IFNAME "rmnetctl"
+
+/* Print every MHDP SKB content */
+/*#define MHDP_DEBUG_SKB*/
+
+
+#define EPRINTK(...) printk(KERN_DEBUG "MHI/MHDP: " __VA_ARGS__)
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "MHI/MHDP: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+#ifdef MHDP_DEBUG_SKB
+# define SKBPRINT(a, b) __print_skb_content(a, b)
+#else
+# define SKBPRINT(a, b)
+#endif
+
+/* IPv6 support */
+#define VER_IPv4 0x04
+#define VER_IPv6 0x06
+#define ETH_IP_TYPE(x) (((0x00|(x>>4)) == VER_IPv4) ? ETH_P_IP : ETH_P_IPV6)
+
+int sysctl_mhdp_concat_nb_pkt __read_mostly;
+EXPORT_SYMBOL(sysctl_mhdp_concat_nb_pkt);
+
+/*** Type definitions ***/
+#ifdef MHDP_USE_NAPI
+#define NAPI_WEIGHT 64
+#endif /*MHDP_USE_NAPI*/
+
+#define MAX_MHDPHDR_SIZE 18
+
+struct mhdp_tunnel {
+ struct mhdp_tunnel *next;
+ struct net_device *dev;
+ struct net_device *master_dev;
+ struct sk_buff *skb;
+ int pdn_id;
+ int free_pdn;
+ struct timer_list tx_timer;
+ struct sk_buff *skb_to_free[MAX_MHDPHDR_SIZE];
+ spinlock_t timer_lock;
+};
+
+struct mhdp_net {
+ struct mhdp_tunnel *tunnels;
+ struct net_device *ctl_dev;
+#ifdef MHDP_USE_NAPI
+ struct net_device *dev;
+ struct napi_struct napi;
+ struct sk_buff_head skb_list;
+#endif /*#ifdef MHDP_USE_NAPI*/
+};
+
+struct packet_info {
+ uint32_t pdn_id;
+ uint32_t packet_offset;
+ uint32_t packet_length;
+};
+
+struct mhdp_hdr {
+ uint32_t packet_count;
+ struct packet_info info[MAX_MHDPHDR_SIZE];
+};
+
+
+/*** Prototypes ***/
+
+static void mhdp_netdev_setup(struct net_device *dev);
+
+static void mhdp_submit_queued_skb(struct mhdp_tunnel *tunnel, int force_send);
+
+static int mhdp_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+
+static void tx_timer_timeout(unsigned long arg);
+
+#ifdef MHDP_USE_NAPI
+
+static int mhdp_poll(struct napi_struct *napi, int budget);
+
+#endif /*MHDP_USE_NAPI*/
+
+/*** Global Variables ***/
+
+static int mhdp_net_id __read_mostly;
+
+static struct notifier_block mhdp_netdev_notifier = {
+ .notifier_call = mhdp_netdev_event,
+};
+
+/*** Funtions ***/
+
+#ifdef MHDP_DEBUG_SKB
+static void
+__print_skb_content(struct sk_buff *skb, const char *tag)
+{
+ struct page *page;
+ skb_frag_t *frag;
+ int len;
+ int i, j;
+ u8 *ptr;
+
+ /* Main SKB buffer */
+ ptr = (u8 *)skb->data;
+ len = skb_headlen(skb);
+
+ printk(KERN_DEBUG "MHDP: SKB buffer lenght %02u\n", len);
+ for (i = 0; i < len; i++) {
+ if (i%8 == 0)
+ printk(KERN_DEBUG "%s DATA: ", tag);
+ printk(" 0x%02X", ptr[i]);
+ if (i%8 == 7 || i == len - 1)
+ printk("\n");
+ }
+
+ /* SKB fragments */
+ for (i = 0; i < (skb_shinfo(skb)->nr_frags); i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ page = skb_frag_page(frag);
+
+ ptr = page_address(page);
+
+ for (j = 0; j < frag->size; j++) {
+ if (j%8 == 0)
+ printk(KERN_DEBUG "%s FRAG[%d]: ", tag, i);
+ printk(" 0x%02X", ptr[frag->page_offset + j]);
+ if (j%8 == 7 || j == frag->size - 1)
+ printk("\n");
+ }
+ }
+}
+#endif
+
+
+static inline struct mhdp_net *
+mhdp_net_dev(struct net_device *dev)
+{
+ return net_generic(dev_net(dev), mhdp_net_id);
+}
+
+static void
+mhdp_tunnel_init(struct net_device *dev,
+ struct mhdp_tunnel_parm *parms,
+ struct net_device *master_dev)
+{
+ struct mhdp_net *mhdpn = mhdp_net_dev(dev);
+ struct mhdp_tunnel *tunnel = netdev_priv(dev);
+
+ DPRINTK("mhdp_tunnel_init: dev:%s", dev->name);
+
+ tunnel->next = mhdpn->tunnels;
+ mhdpn->tunnels = tunnel;
+
+ tunnel->dev = dev;
+ tunnel->master_dev = master_dev;
+ tunnel->skb = NULL;
+ tunnel->pdn_id = parms->pdn_id;
+ tunnel->free_pdn = 0;
+
+ init_timer(&tunnel->tx_timer);
+ spin_lock_init(&tunnel->timer_lock);
+}
+
+static void
+mhdp_tunnel_destroy(struct net_device *dev)
+{
+ DPRINTK("mhdp_tunnel_destroy: dev:%s", dev->name);
+
+ unregister_netdevice(dev);
+}
+
+static void
+mhdp_destroy_tunnels(struct mhdp_net *mhdpn)
+{
+ struct mhdp_tunnel *tunnel;
+
+ for (tunnel = mhdpn->tunnels; (tunnel); tunnel = tunnel->next)
+ mhdp_tunnel_destroy(tunnel->dev);
+
+ mhdpn->tunnels = NULL;
+}
+
+static struct mhdp_tunnel *
+mhdp_locate_tunnel(struct mhdp_net *mhdpn, int pdn_id)
+{
+ struct mhdp_tunnel *tunnel;
+
+ for (tunnel = mhdpn->tunnels; tunnel; tunnel = tunnel->next)
+ if (tunnel->pdn_id == pdn_id)
+ return tunnel;
+
+ return NULL;
+}
+
+static struct net_device *
+mhdp_add_tunnel(struct net *net, struct mhdp_tunnel_parm *parms)
+{
+ struct net_device *mhdp_dev, *master_dev;
+
+ DPRINTK("mhdp_add_tunnel: adding a tunnel to %s\n", parms->master);
+
+ master_dev = dev_get_by_name(net, parms->master);
+ if (!master_dev)
+ goto err_alloc_dev;
+
+ mhdp_dev = alloc_netdev(sizeof(struct mhdp_tunnel),
+ MHDP_IFNAME, mhdp_netdev_setup);
+ if (!mhdp_dev)
+ goto err_alloc_dev;
+
+ dev_net_set(mhdp_dev, net);
+
+ if (dev_alloc_name(mhdp_dev, MHDP_IFNAME) < 0)
+ goto err_reg_dev;
+
+ strcpy(parms->name, mhdp_dev->name);
+
+ if (register_netdevice(mhdp_dev)) {
+ printk(KERN_ERR "MHDP: register_netdev failed\n");
+ goto err_reg_dev;
+ }
+
+ dev_hold(mhdp_dev);
+
+ mhdp_tunnel_init(mhdp_dev, parms, master_dev);
+
+ dev_put(master_dev);
+
+ return mhdp_dev;
+
+err_reg_dev:
+ free_netdev(mhdp_dev);
+err_alloc_dev:
+ return NULL;
+}
+
+
+static int
+mhdp_netdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct net *net = dev_net(dev);
+ struct mhdp_net *mhdpn = mhdp_net_dev(dev);
+ struct mhdp_tunnel *tunnel, *pre_dev;
+ struct mhdp_tunnel_parm __user *u_parms;
+ struct mhdp_tunnel_parm k_parms;
+
+ int err = 0;
+
+ DPRINTK("mhdp tunnel ioctl %X", cmd);
+
+ switch (cmd) {
+
+ case SIOCADDPDNID:
+ u_parms = (struct mhdp_tunnel_parm *)ifr->ifr_data;
+ if (copy_from_user(&k_parms, u_parms,
+ sizeof(struct mhdp_tunnel_parm))) {
+ DPRINTK("Error: Failed to copy data from user space");
+ return -EFAULT;
+ }
+
+ DPRINTK("pdn_id:%d master_device:%s",
+ k_parms.pdn_id,
+ k_parms.master);
+ tunnel = mhdp_locate_tunnel(mhdpn, k_parms.pdn_id);
+
+ if (NULL == tunnel) {
+ if (mhdp_add_tunnel(net, &k_parms)) {
+ if (copy_to_user(u_parms, &k_parms,
+ sizeof(struct mhdp_tunnel_parm)))
+ err = -EINVAL;
+ } else {
+ err = -EINVAL;
+ }
+
+ } else if (1 == tunnel->free_pdn) {
+
+ tunnel->free_pdn = 0;
+
+ strcpy((char *) &k_parms.name, tunnel->dev->name);
+
+ if (copy_to_user(u_parms, &k_parms,
+ sizeof(struct mhdp_tunnel_parm)))
+ err = -EINVAL;
+ } else {
+ err = -EBUSY;
+ }
+ break;
+
+ case SIOCDELPDNID:
+ u_parms = (struct mhdp_tunnel_parm *)ifr->ifr_data;
+ if (copy_from_user(&k_parms, u_parms,
+ sizeof(struct mhdp_tunnel_parm))) {
+ DPRINTK("Error: Failed to copy data from user space");
+ return -EFAULT;
+ }
+
+ DPRINTK("pdn_id:%d", k_parms.pdn_id);
+
+ for (tunnel = mhdpn->tunnels, pre_dev = NULL;
+ tunnel;
+ pre_dev = tunnel, tunnel = tunnel->next) {
+ if (tunnel->pdn_id == k_parms.pdn_id)
+ tunnel->free_pdn = 1;
+ }
+ break;
+
+ case SIOCRESETMHDP:
+ mhdp_destroy_tunnels(mhdpn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int
+mhdp_netdev_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < MHDP_MTU_MIN || new_mtu > MHDP_MTU_MAX)
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+
+ return 0;
+}
+
+static void
+mhdp_netdev_uninit(struct net_device *dev)
+{
+ dev_put(dev);
+}
+
+
+static void
+mhdp_submit_queued_skb(struct mhdp_tunnel *tunnel, int force_send)
+{
+ struct sk_buff *skb = tunnel->skb;
+ struct l2muxhdr *l2hdr;
+ struct mhdp_hdr *mhdpHdr;
+ int i, nb_frags;
+
+ BUG_ON(!tunnel->master_dev);
+
+ if (skb) {
+ mhdpHdr = (struct mhdp_hdr *)tunnel->skb->data;
+ nb_frags = mhdpHdr->packet_count;
+
+ skb->protocol = htons(ETH_P_MHDP);
+ skb->priority = 1;
+
+ skb->dev = tunnel->master_dev;
+
+ skb_reset_network_header(skb);
+
+ skb_push(skb, L2MUX_HDR_SIZE);
+ skb_reset_mac_header(skb);
+
+ l2hdr = l2mux_hdr(skb);
+ l2mux_set_proto(l2hdr, MHI_L3_MHDP_UL);
+ l2mux_set_length(l2hdr, skb->len - L2MUX_HDR_SIZE);
+
+ SKBPRINT(skb, "MHDP: TX");
+
+ tunnel->dev->stats.tx_packets++;
+ tunnel->skb = NULL;
+
+ dev_queue_xmit(skb);
+
+ for (i = 0; i < nb_frags; i++)
+ if (tunnel->skb_to_free[i])
+ dev_kfree_skb(tunnel->skb_to_free[i]);
+ else
+ EPRINTK("submit_q_skb: error no skb to free\n");
+ }
+}
+
+static int
+mhdp_netdev_rx(struct sk_buff *skb, struct net_device *dev)
+{
+ skb_frag_t *frag = NULL;
+ struct page *page = NULL;
+ struct sk_buff *newskb;
+ struct mhdp_hdr *mhdpHdr;
+ int offset, length;
+ int err = 0, i, pdn_id;
+ int mhdp_header_len;
+ struct mhdp_tunnel *tunnel = NULL;
+ int start = 0;
+ int has_frag = skb_shinfo(skb)->nr_frags;
+ uint32_t packet_count;
+ unsigned char ip_ver;
+
+ if (has_frag) {
+ frag = &skb_shinfo(skb)->frags[0];
+ page = skb_frag_page(frag);
+ }
+
+ if (skb_headlen(skb) > L2MUX_HDR_SIZE)
+ skb_pull(skb, L2MUX_HDR_SIZE);
+ else if (has_frag)
+ frag->page_offset += L2MUX_HDR_SIZE;
+
+ packet_count = *((unsigned char *)skb->data);
+
+ mhdp_header_len = sizeof(packet_count) +
+ (packet_count * sizeof(struct packet_info));
+
+ if (mhdp_header_len > skb_headlen(skb)) {
+ int skbheadlen = skb_headlen(skb);
+
+ DPRINTK("mhdp header length: %d, skb_headerlen: %d",
+ mhdp_header_len, skbheadlen);
+
+ mhdpHdr = kmalloc(mhdp_header_len, GFP_ATOMIC);
+ if (mhdpHdr == NULL) {
+ printk(KERN_ERR "%s: kmalloc failed.\n", __func__);
+ return err;
+ }
+
+ if (skbheadlen == 0) {
+ memcpy((__u8 *)mhdpHdr, page_address(page) +
+ frag->page_offset,
+ mhdp_header_len);
+
+ } else {
+ memcpy((__u8 *)mhdpHdr, skb->data, skbheadlen);
+
+ memcpy((__u8 *)mhdpHdr + skbheadlen,
+ page_address(page) +
+ frag->page_offset,
+ mhdp_header_len - skbheadlen);
+
+ start = mhdp_header_len - skbheadlen;
+ }
+
+ DPRINTK("page start: %d", start);
+ } else {
+ DPRINTK("skb->data has whole mhdp header");
+ mhdpHdr = (struct mhdp_hdr *)(((__u8 *)skb->data));
+ }
+
+ DPRINTK("MHDP PACKET COUNT : %d", mhdpHdr->packet_count);
+
+ rcu_read_lock();
+
+ for (i = 0; i < mhdpHdr->packet_count; i++) {
+
+ DPRINTK(" packet_info[%d] - PDNID:%d, packet_offset: %d,\
+ packet_length: %d\n", i, mhdpHdr->info[i].pdn_id,
+ mhdpHdr->info[i].packet_offset,
+ mhdpHdr->info[i].packet_length);
+
+ pdn_id = mhdpHdr->info[i].pdn_id;
+ offset = mhdpHdr->info[i].packet_offset;
+ length = mhdpHdr->info[i].packet_length;
+
+ if (skb_headlen(skb) > (mhdp_header_len + offset)) {
+
+ newskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!newskb))
+ goto error;
+
+ skb_pull(newskb, mhdp_header_len + offset);
+ ip_ver = (u8)*newskb->data;
+
+ } else if (has_frag) {
+
+ newskb = netdev_alloc_skb(dev, skb_headlen(skb));
+
+ if (unlikely(!newskb))
+ goto error;
+
+ get_page(page);
+ skb_add_rx_frag(newskb,
+ skb_shinfo(newskb)->nr_frags,
+ page,
+ frag->page_offset +
+ ((mhdp_header_len - skb_headlen(skb)) + offset),
+ length,
+ PAGE_SIZE);
+
+ ip_ver = *((unsigned long *)page_address(page) +
+ (frag->page_offset +
+ ((mhdp_header_len - skb_headlen(skb)) +
+ offset)));
+ if ((ip_ver>>4) != VER_IPv4 &&
+ (ip_ver>>4) != VER_IPv6)
+ goto error;
+
+ } else {
+ DPRINTK("Error in the data received");
+ goto error;
+ }
+
+ skb_reset_network_header(newskb);
+
+ /* IPv6 Support - Check the IP version */
+ /* and set ETH_P_IP or ETH_P_IPv6 for received packets */
+
+ newskb->protocol = htons(ETH_IP_TYPE(ip_ver));
+
+ newskb->pkt_type = PACKET_HOST;
+
+ skb_tunnel_rx(newskb, dev);
+
+ tunnel = mhdp_locate_tunnel(mhdp_net_dev(dev), pdn_id);
+ if (tunnel) {
+ struct net_device_stats *stats = &tunnel->dev->stats;
+ stats->rx_packets++;
+ newskb->dev = tunnel->dev;
+ SKBPRINT(newskb, "NEWSKB: RX");
+#ifdef MHDP_USE_NAPI
+ netif_receive_skb(newskb);
+#else
+ netif_rx(newskb);
+#endif /*#ifdef MHDP_USE_NAPI*/
+ }
+ }
+ rcu_read_unlock();
+
+ if (mhdp_header_len > skb_headlen(skb))
+ kfree(mhdpHdr);
+
+ dev_kfree_skb(skb);
+
+ return err;
+
+error:
+ if (mhdp_header_len > skb_headlen(skb))
+ kfree(mhdpHdr);
+
+ dev_kfree_skb(skb);
+
+ return err;
+}
+
+#ifdef MHDP_USE_NAPI
+/*
+static int mhdp_poll(struct napi_struct *napi, int budget)
+function called through napi to read current ip frame received
+*/
+static int mhdp_poll(struct napi_struct *napi, int budget)
+{
+ struct mhdp_net *mhdpn = container_of(napi, struct mhdp_net, napi);
+ int err = 0;
+ struct sk_buff *skb;
+
+ while (!skb_queue_empty(&mhdpn->skb_list)) {
+
+ skb = skb_dequeue(&mhdpn->skb_list);
+ err = mhdp_netdev_rx(skb, mhdpn->dev);
+ }
+
+ napi_complete(napi);
+
+ return err;
+}
+/*l2mux callback*/
+static int
+mhdp_netdev_rx_napi(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mhdp_net *mhdpn = mhdp_net_dev(dev);
+
+
+ if (mhdpn) {
+
+ mhdpn->dev = dev;
+ skb_queue_tail(&mhdpn->skb_list, skb);
+
+ napi_schedule(&mhdpn->napi);
+
+ } else {
+ EPRINTK("mhdp_netdev_rx_napi-MHDP driver init not correct\n");
+ }
+
+ return 0;
+}
+#endif /*MHDP_USE_NAPI*/
+
+static void tx_timer_timeout(unsigned long arg)
+{
+ struct mhdp_tunnel *tunnel = (struct mhdp_tunnel *) arg;
+
+ spin_lock(&tunnel->timer_lock);
+
+ mhdp_submit_queued_skb(tunnel, 1);
+
+ spin_unlock(&tunnel->timer_lock);
+}
+
+static int
+mhdp_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mhdp_hdr *mhdpHdr;
+ struct mhdp_tunnel *tunnel = netdev_priv(dev);
+ struct net_device_stats *stats = &tunnel->dev->stats;
+ struct page *page = NULL;
+ int i;
+ int packet_count, offset, len;
+
+ spin_lock(&tunnel->timer_lock);
+
+ SKBPRINT(skb, "SKB: TX");
+
+ if (timer_pending(&tunnel->tx_timer))
+ del_timer(&tunnel->tx_timer);
+
+#if 0
+ {
+ int i;
+ int len = skb->len;
+ u8 *ptr = skb->data;
+
+ for (i = 0; i < len; i++) {
+ if (i%8 == 0)
+ printk(KERN_DEBUG
+ "MHDP mhdp_netdev_xmit: TX [%04X] ", i);
+ printk(" 0x%02X", ptr[i]);
+ if (i%8 == 7 || i == len-1)
+ printk("\n");
+ }
+ }
+#endif
+xmit_again:
+
+ if (tunnel->skb == NULL) {
+ tunnel->skb = netdev_alloc_skb(dev,
+ L2MUX_HDR_SIZE + sizeof(struct mhdp_hdr));
+
+ if (!tunnel->skb) {
+ EPRINTK("mhdp_netdev_xmit error1");
+ BUG();
+ }
+
+ /* Place holder for the mhdp packet count */
+ len = skb_headroom(tunnel->skb) - L2MUX_HDR_SIZE - ETH_HLEN;
+
+ skb_push(tunnel->skb, len);
+ len -= 4;
+
+ memset(tunnel->skb->data, 0, len);
+
+ /*
+ * Need to replace following logic, with something better like
+ * __pskb_pull_tail or pskb_may_pull(tunnel->skb, len);
+ */
+ {
+ tunnel->skb->tail -= len;
+ tunnel->skb->len -= len;
+ }
+
+
+ mhdpHdr = (struct mhdp_hdr *)tunnel->skb->data;
+ mhdpHdr->packet_count = 0;
+ }
+
+ /*This new frame is to big for the current mhdp frame,
+ * send the frame first*/
+ if (tunnel->skb->len + skb->len >= MAX_MHDP_FRAME_SIZE) {
+
+ mhdp_submit_queued_skb(tunnel, 1);
+
+ goto xmit_again;
+
+ } else {
+
+ /*
+ * skb_put cannot be called as the (data_len != 0)
+ */
+ tunnel->skb->tail += sizeof(struct packet_info);
+ tunnel->skb->len += sizeof(struct packet_info);
+
+ DPRINTK("new - skb->tail:%lu skb->end:%lu skb->data_len:%lu",
+ (unsigned long)tunnel->skb->tail,
+ (unsigned long)tunnel->skb->end,
+ (unsigned long)tunnel->skb->data_len);
+
+ mhdpHdr = (struct mhdp_hdr *)tunnel->skb->data;
+
+ tunnel->skb_to_free[mhdpHdr->packet_count] = skb;
+
+ packet_count = mhdpHdr->packet_count;
+ mhdpHdr->info[packet_count].pdn_id = tunnel->pdn_id;
+ if (packet_count == 0) {
+ mhdpHdr->info[packet_count].packet_offset = 0;
+ } else {
+ mhdpHdr->info[packet_count].packet_offset =
+ mhdpHdr->info[packet_count - 1].packet_offset +
+ mhdpHdr->info[packet_count - 1].packet_length;
+ }
+
+ mhdpHdr->info[packet_count].packet_length = skb->len;
+ mhdpHdr->packet_count++;
+
+ page = virt_to_page(skb->data);
+
+ if (page == NULL) {
+ EPRINTK("kmap_atomic_to_page returns NULL");
+ goto tx_error;
+ }
+
+ get_page(page);
+
+ offset = ((unsigned long)skb->data -
+ (unsigned long)page_address(page));
+
+ skb_add_rx_frag(tunnel->skb, skb_shinfo(tunnel->skb)->nr_frags,
+ page, offset, skb_headlen(skb), PAGE_SIZE);
+
+ if (skb_shinfo(skb)->nr_frags) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag =
+ &skb_shinfo(tunnel->skb)->frags[i];
+
+ get_page(skb_frag_page(frag));
+ skb_add_rx_frag(tunnel->skb,
+ skb_shinfo(tunnel->skb)->nr_frags,
+ skb_frag_page(frag),
+ frag->page_offset,
+ frag->size,
+ PAGE_SIZE);
+ }
+ }
+
+ if (mhdpHdr->packet_count >= MAX_MHDPHDR_SIZE) {
+ mhdp_submit_queued_skb(tunnel, 1);
+ } else {
+ tunnel->tx_timer.function = &tx_timer_timeout;
+ tunnel->tx_timer.data = (unsigned long) tunnel;
+ tunnel->tx_timer.expires =
+ jiffies + ((HZ + 999) / 1000);
+ add_timer(&tunnel->tx_timer);
+ }
+ }
+
+ spin_unlock(&tunnel->timer_lock);
+ return NETDEV_TX_OK;
+
+tx_error:
+ spin_unlock(&tunnel->timer_lock);
+ stats->tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+
+static int
+mhdp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *event_dev = (struct net_device *)ptr;
+
+ DPRINTK("event_dev: %s, event: %lx\n",
+ event_dev ? event_dev->name : "None", event);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ {
+ struct mhdp_net *mhdpn = mhdp_net_dev(event_dev);
+ struct mhdp_tunnel *iter, *prev;
+
+ DPRINTK("event_dev: %s, event: %lx\n",
+ event_dev ? event_dev->name : "None", event);
+
+ for (iter = mhdpn->tunnels, prev = NULL;
+ iter; prev = iter, iter = iter->next) {
+ if (event_dev == iter->master_dev) {
+ if (!prev)
+ mhdpn->tunnels =
+ mhdpn->tunnels->next;
+ else
+ prev->next = iter->next;
+ mhdp_tunnel_destroy(iter->dev);
+ }
+ }
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+#ifdef MHDP_BONDING_SUPPORT
+static void
+cdma_netdev_uninit(struct net_device *dev)
+{
+ dev_put(dev);
+}
+
+static void mhdp_ethtool_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strncpy(drvinfo->driver, dev->name, 32);
+}
+
+static const struct ethtool_ops mhdp_ethtool_ops = {
+ .get_drvinfo = mhdp_ethtool_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+#endif /* MHDP_BONDING_SUPPORT */
+
+static const struct net_device_ops mhdp_netdev_ops = {
+ .ndo_uninit = mhdp_netdev_uninit,
+ .ndo_start_xmit = mhdp_netdev_xmit,
+ .ndo_do_ioctl = mhdp_netdev_ioctl,
+ .ndo_change_mtu = mhdp_netdev_change_mtu,
+};
+
+static void mhdp_netdev_setup(struct net_device *dev)
+{
+ dev->netdev_ops = &mhdp_netdev_ops;
+#ifdef MHDP_BONDING_SUPPORT
+ dev->ethtool_ops = &mhdp_ethtool_ops;
+#endif /* MHDP_BONDING_SUPPORT */
+
+ dev->destructor = free_netdev;
+
+#ifdef truc /* MHDP_BONDING_SUPPORT */
+ ether_setup(dev);
+ dev->flags |= IFF_NOARP;
+ dev->iflink = 0;
+ dev->features |= (NETIF_F_NETNS_LOCAL | NETIF_F_FRAGLIST);
+#else
+ dev->type = ARPHRD_TUNNEL;
+ dev->hard_header_len = L2MUX_HDR_SIZE + sizeof(struct mhdp_hdr);
+ dev->mtu = ETH_DATA_LEN;
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ dev->features |= (NETIF_F_NETNS_LOCAL | NETIF_F_FRAGLIST);
+#endif /* MHDP_BONDING_SUPPORT */
+}
+
+static int __net_init mhdp_init_net(struct net *net)
+{
+ struct mhdp_net *mhdpn = net_generic(net, mhdp_net_id);
+ int err;
+
+ mhdpn->tunnels = NULL;
+
+ mhdpn->ctl_dev = alloc_netdev(sizeof(struct mhdp_tunnel),
+ MHDP_CTL_IFNAME,
+ mhdp_netdev_setup);
+ if (!mhdpn->ctl_dev)
+ return -ENOMEM;
+
+ dev_net_set(mhdpn->ctl_dev, net);
+ dev_hold(mhdpn->ctl_dev);
+
+ err = register_netdev(mhdpn->ctl_dev);
+ if (err) {
+ printk(KERN_ERR MHDP_CTL_IFNAME " register failed");
+ free_netdev(mhdpn->ctl_dev);
+ return err;
+ }
+
+#ifdef MHDP_USE_NAPI
+ netif_napi_add(mhdpn->ctl_dev, &mhdpn->napi, mhdp_poll, NAPI_WEIGHT);
+ napi_enable(&mhdpn->napi);
+ skb_queue_head_init(&mhdpn->skb_list);
+#endif /*#ifdef MHDP_USE_NAPI*/
+ return 0;
+}
+
+static void __net_exit mhdp_exit_net(struct net *net)
+{
+ struct mhdp_net *mhdpn = net_generic(net, mhdp_net_id);
+
+ rtnl_lock();
+ mhdp_destroy_tunnels(mhdpn);
+ unregister_netdevice(mhdpn->ctl_dev);
+ rtnl_unlock();
+}
+
+static struct pernet_operations mhdp_net_ops = {
+ .init = mhdp_init_net,
+ .exit = mhdp_exit_net,
+ .id = &mhdp_net_id,
+ .size = sizeof(struct mhdp_net),
+};
+
+
+static int __init mhdp_init(void)
+{
+ int err;
+
+#ifdef MHDP_USE_NAPI
+ err = l2mux_netif_rx_register(MHI_L3_MHDP_DL, mhdp_netdev_rx_napi);
+#else
+ err = l2mux_netif_rx_register(MHI_L3_MHDP_DL, mhdp_netdev_rx);
+#endif /* MHDP_USE_NAPI */
+ if (err)
+ goto rollback0;
+
+ err = register_pernet_device(&mhdp_net_ops);
+ if (err < 0)
+ goto rollback1;
+
+ err = register_netdevice_notifier(&mhdp_netdev_notifier);
+ if (err < 0)
+ goto rollback2;
+
+ return 0;
+
+rollback2:
+ unregister_pernet_device(&mhdp_net_ops);
+rollback1:
+ l2mux_netif_rx_unregister(MHI_L3_MHDP_DL);
+rollback0:
+ return err;
+}
+
+static void __exit mhdp_exit(void)
+{
+ l2mux_netif_rx_unregister(MHI_L3_MHDP_DL);
+ unregister_netdevice_notifier(&mhdp_netdev_notifier);
+ unregister_pernet_device(&mhdp_net_ops);
+}
+
+
+module_init(mhdp_init);
+module_exit(mhdp_exit);
+
+MODULE_AUTHOR("Sugnan Prabhu S <sugnan.prabhu@renesasmobile.com>");
+MODULE_DESCRIPTION("Modem Host Data Protocol for MHI");
+MODULE_LICENSE("GPL");
+
diff --git a/net/mhi/l3mhi.c b/net/mhi/l3mhi.c
new file mode 100644
index 000000000000..086bad017bad
--- /dev/null
+++ b/net/mhi/l3mhi.c
@@ -0,0 +1,132 @@
+/*
+ * File: l3mhi.c
+ *
+ * L2 channels to AF_MHI binding.
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri To Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <net/af_mhi.h>
+#include <net/mhi/sock.h>
+#include <net/mhi/dgram.h>
+
+#define MAX_CHANNELS 256
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "L3MHI: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/* Module parameters - with defaults */
+static int l2chs[MAX_CHANNELS] = {
+ MHI_L3_FILE,
+ MHI_L3_XFILE,
+ MHI_L3_SECURITY,
+ MHI_L3_TEST,
+ MHI_L3_TEST_PRIO,
+ MHI_L3_THERMAL,
+ MHI_L3_HIGH_PRIO_TEST,
+ MHI_L3_MED_PRIO_TEST,
+ MHI_L3_LOW_PRIO_TEST
+};
+static int l2cnt = 9;
+
+
+
+/* Functions */
+
+static int
+mhi_netif_rx(struct sk_buff *skb, struct net_device *dev)
+{
+ skb->protocol = htons(ETH_P_MHI);
+
+ return netif_rx(skb);
+}
+
+
+/* Module registration */
+
+int __init l3mhi_init(void)
+{
+ int ch, i;
+ int err;
+
+ for (i = 0; i < l2cnt; i++) {
+ ch = l2chs[i];
+ if (ch >= 0 && ch < MHI_L3_NPROTO) {
+ err = l2mux_netif_rx_register(ch, mhi_netif_rx);
+ if (err)
+ goto error;
+
+ err = mhi_register_protocol(ch);
+ if (err)
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ for (i = 0; i < l2cnt; i++) {
+ ch = l2chs[i];
+ if (ch >= 0 && ch < MHI_L3_NPROTO) {
+ if (mhi_protocol_registered(ch)) {
+ l2mux_netif_rx_unregister(ch);
+ mhi_unregister_protocol(ch);
+ }
+ }
+ }
+
+ return err;
+}
+
+void __exit l3mhi_exit(void)
+{
+ int ch, i;
+
+ for (i = 0; i < l2cnt; i++) {
+ ch = l2chs[i];
+ if (ch >= 0 && ch < MHI_L3_NPROTO) {
+ if (mhi_protocol_registered(ch)) {
+ l2mux_netif_rx_unregister(ch);
+ mhi_unregister_protocol(ch);
+ }
+ }
+ }
+}
+
+
+module_init(l3mhi_init);
+module_exit(l3mhi_exit);
+
+module_param_array_named(l2_channels, l2chs, int, &l2cnt, 0444);
+
+MODULE_AUTHOR("Petri Mattila <petri.to.mattila@renesasmobile.com>");
+MODULE_DESCRIPTION("L3 MHI Binding");
+MODULE_LICENSE("GPL");
+
diff --git a/net/mhi/l3phonet.c b/net/mhi/l3phonet.c
new file mode 100644
index 000000000000..8de7471296db
--- /dev/null
+++ b/net/mhi/l3phonet.c
@@ -0,0 +1,118 @@
+/*
+ * File: l3phonet.c
+ *
+ * L2 PHONET channel to AF_PHONET binding.
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri To Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+
+/* Functions */
+
+static int
+mhi_pn_netif_rx(struct sk_buff *skb, struct net_device *dev)
+{
+ /* Set Protocol Family */
+ skb->protocol = htons(ETH_P_PHONET);
+
+ /* Remove L2MUX header and Phonet media byte */
+ skb_pull(skb, L2MUX_HDR_SIZE + 1);
+
+ /* Pass upwards to the Procotol Family */
+ return netif_rx(skb);
+}
+
+static int
+mhi_pn_netif_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct l2muxhdr *l2hdr;
+ int l3len;
+ u8 *ptr;
+
+ /* Add media byte */
+ ptr = skb_push(skb, 1);
+
+ /* Set media byte */
+ ptr[0] = dev->dev_addr[0];
+
+ /* L3 length */
+ l3len = skb->len;
+
+ /* Add L2MUX header */
+ skb_push(skb, L2MUX_HDR_SIZE);
+
+ /* Mac header starts here */
+ skb_reset_mac_header(skb);
+
+ /* L2MUX header pointer */
+ l2hdr = l2mux_hdr(skb);
+
+ /* L3 Proto ID */
+ l2mux_set_proto(l2hdr, MHI_L3_PHONET);
+
+ /* L3 payload length */
+ l2mux_set_length(l2hdr, l3len);
+
+ return 0;
+}
+
+
+/* Module registration */
+
+int __init mhi_pn_init(void)
+{
+ int err;
+
+ err = l2mux_netif_rx_register(MHI_L3_PHONET, mhi_pn_netif_rx);
+ if (err)
+ goto err1;
+
+ err = l2mux_netif_tx_register(ETH_P_PHONET, mhi_pn_netif_tx);
+ if (err)
+ goto err2;
+
+ return 0;
+
+err2:
+ l2mux_netif_rx_unregister(MHI_L3_PHONET);
+err1:
+ return err;
+}
+
+void __exit mhi_pn_exit(void)
+{
+ l2mux_netif_rx_unregister(MHI_L3_PHONET);
+ l2mux_netif_tx_unregister(ETH_P_PHONET);
+}
+
+
+module_init(mhi_pn_init);
+module_exit(mhi_pn_exit);
+
+MODULE_AUTHOR("Petri Mattila <petri.to.mattila@renesasmobile.com>");
+MODULE_DESCRIPTION("MHI Phonet protocol family bridge");
+MODULE_LICENSE("GPL");
+
diff --git a/net/mhi/mhi_dgram.c b/net/mhi/mhi_dgram.c
new file mode 100644
index 000000000000..b36f9376a923
--- /dev/null
+++ b/net/mhi/mhi_dgram.c
@@ -0,0 +1,330 @@
+/*
+ * File: mhi_dgram.c
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * DGRAM socket implementation for MHI protocol family.
+ *
+ * It uses the MHI socket framework in mhi_socket.c
+ *
+ * This implementation is the most basic frame passing interface.
+ * The user space can use the sendmsg() and recvmsg() system calls
+ * to access the frames. The socket is created with the socket()
+ * system call, e.g. socket(PF_MHI,SOCK_DGRAM,l2proto).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <asm/ioctls.h>
+
+#include <net/af_mhi.h>
+#include <net/mhi/sock.h>
+#include <net/mhi/dgram.h>
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "MHI/DGRAM: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/*** Prototypes ***/
+
+static struct proto mhi_dgram_proto;
+
+static void mhi_dgram_destruct(struct sock *sk);
+
+
+/*** Functions ***/
+
+int mhi_dgram_sock_create(
+ struct net *net,
+ struct socket *sock,
+ int proto,
+ int kern)
+{
+ struct sock *sk;
+ struct mhi_sock *msk;
+
+ DPRINTK("mhi_dgram_sock_create: proto:%d type:%d\n",
+ proto, sock->type);
+
+ if (sock->type != SOCK_DGRAM)
+ return -EPROTONOSUPPORT;
+
+ if (proto == MHI_L3_ANY)
+ return -EPROTONOSUPPORT;
+
+ sk = sk_alloc(net, PF_MHI, GFP_KERNEL, &mhi_dgram_proto);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ sock->ops = &mhi_socket_ops;
+ sock->state = SS_UNCONNECTED;
+
+ sk->sk_protocol = proto;
+ sk->sk_destruct = mhi_dgram_destruct;
+ sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+
+ sk->sk_prot->init(sk);
+
+ msk = mhi_sk(sk);
+
+ msk->sk_l3proto = proto;
+ msk->sk_ifindex = -1;
+
+ return 0;
+}
+
+static int mhi_dgram_init(struct sock *sk)
+{
+ return 0;
+}
+
+static void mhi_dgram_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static void mhi_dgram_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static int mhi_dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ int err;
+
+ DPRINTK("mhi_dgram_ioctl: cmd:%d arg:%lu\n", cmd, arg);
+
+ switch (cmd) {
+ case SIOCOUTQ:
+ {
+ int len;
+ len = sk_wmem_alloc_get(sk);
+ err = put_user(len, (int __user *)arg);
+ }
+ break;
+
+ case SIOCINQ:
+ {
+ struct sk_buff *skb;
+ int len;
+
+ lock_sock(sk);
+ {
+ skb = skb_peek(&sk->sk_receive_queue);
+ len = skb ? skb->len : 0;
+ }
+ release_sock(sk);
+
+ err = put_user(len, (int __user *)arg);
+ }
+ break;
+
+ default:
+ err = -ENOIOCTLCMD;
+ }
+
+ return err;
+}
+
+static int mhi_dgram_sendmsg(
+ struct kiocb *iocb,
+ struct sock *sk,
+ struct msghdr *msg,
+ size_t len)
+{
+ struct mhi_sock *msk = mhi_sk(sk);
+ struct net_device *dev = NULL;
+ struct l2muxhdr *l2hdr;
+ struct sk_buff *skb;
+ unsigned mflags;
+
+ int err = -EFAULT;
+ mflags = (MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_CMSG_COMPAT);
+
+ if (msg->msg_flags & ~mflags) {
+ printk(KERN_WARNING "%s: incompatible msg_flags: 0x%08X\n",
+ __func__, msg->msg_flags);
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ skb = sock_alloc_send_skb(sk, len + L2MUX_HDR_SIZE + ETH_HLEN,
+ (msg->msg_flags & MSG_DONTWAIT), &err);
+ if (!skb) {
+ printk(KERN_ERR "%s: sock_alloc_send_skb failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ skb_reserve(skb, L2MUX_HDR_SIZE + ETH_HLEN);
+ skb_reset_transport_header(skb);
+
+ err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len);
+ if (err < 0) {
+ printk(KERN_ERR "%s: memcpy_fromiovec failed: %d\n",
+ __func__, err);
+ goto drop;
+ }
+
+ if (msk->sk_ifindex)
+ dev = dev_get_by_index(sock_net(sk), msk->sk_ifindex);
+
+ if (!dev) {
+ printk(KERN_ERR "%s: no device for ifindex:%d\n",
+ __func__, msk->sk_ifindex);
+ goto drop;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ printk(KERN_ERR "%s: device %d not IFF_UP\n",
+ __func__, msk->sk_ifindex);
+ err = -ENETDOWN;
+ goto drop;
+ }
+
+ if (len + L2MUX_HDR_SIZE > dev->mtu) {
+ err = -EMSGSIZE;
+ goto drop;
+ }
+
+ skb_reset_network_header(skb);
+
+ skb_push(skb, L2MUX_HDR_SIZE);
+ skb_reset_mac_header(skb);
+
+ l2hdr = l2mux_hdr(skb);
+ l2mux_set_proto(l2hdr, sk->sk_protocol);
+ l2mux_set_length(l2hdr, len);
+
+ err = mhi_skb_send(skb, dev, sk->sk_protocol);
+
+ goto put;
+
+drop:
+ kfree(skb);
+put:
+ if (dev)
+ dev_put(dev);
+out:
+ return err;
+}
+
+static int mhi_dgram_recvmsg(
+ struct kiocb *iocb,
+ struct sock *sk,
+ struct msghdr *msg,
+ size_t len,
+ int noblock,
+ int flags,
+ int *addr_len)
+{
+ struct sk_buff *skb = NULL;
+ int cnt, err;
+ unsigned mflags;
+
+ err = -EOPNOTSUPP;
+ mflags = (MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL|
+ MSG_CMSG_COMPAT);
+
+ if (flags & ~mflags) {
+ printk(KERN_WARNING "%s: incompatible socket flags: 0x%08X",
+ __func__, flags);
+ goto out2;
+ }
+
+ if (addr_len)
+ addr_len[0] = 0;
+
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out2;
+
+ cnt = skb->len - L2MUX_HDR_SIZE;
+ if (len < cnt) {
+ msg->msg_flags |= MSG_TRUNC;
+ cnt = len;
+ }
+
+ err = skb_copy_datagram_iovec(skb, L2MUX_HDR_SIZE, msg->msg_iov, cnt);
+ if (err)
+ goto out;
+
+ if (flags & MSG_TRUNC)
+ err = skb->len - L2MUX_HDR_SIZE;
+ else
+ err = cnt;
+
+out:
+ skb_free_datagram(sk, skb);
+out2:
+ return err;
+}
+
+static int mhi_dgram_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+
+static struct proto mhi_dgram_proto = {
+ .name = "MHI-DGRAM",
+ .owner = THIS_MODULE,
+ .close = mhi_dgram_close,
+ .ioctl = mhi_dgram_ioctl,
+ .init = mhi_dgram_init,
+ .sendmsg = mhi_dgram_sendmsg,
+ .recvmsg = mhi_dgram_recvmsg,
+ .backlog_rcv = mhi_dgram_backlog_rcv,
+ .hash = mhi_sock_hash,
+ .unhash = mhi_sock_unhash,
+ .obj_size = sizeof(struct mhi_sock),
+};
+
+
+int mhi_dgram_proto_init(void)
+{
+ DPRINTK("mhi_dgram_proto_init\n");
+
+ return proto_register(&mhi_dgram_proto, 1);
+}
+
+void mhi_dgram_proto_exit(void)
+{
+ DPRINTK("mhi_dgram_proto_exit\n");
+
+ proto_unregister(&mhi_dgram_proto);
+}
+
+
diff --git a/net/mhi/mhi_proto.c b/net/mhi/mhi_proto.c
new file mode 100644
index 000000000000..e67fbdce4f19
--- /dev/null
+++ b/net/mhi/mhi_proto.c
@@ -0,0 +1,214 @@
+/*
+ * File: mhi_proto.c
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * Modem-Host Interface (MHI) Protocol Family
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/if_mhi.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <net/af_mhi.h>
+#include <net/mhi/sock.h>
+#include <net/mhi/dgram.h>
+#include <net/mhi/raw.h>
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "AF_MHI: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/* Supported L2 protocols */
+static __u8 mhi_protocols[MHI_L3_NPROTO] __read_mostly = { 0, };
+
+
+/*** Functions ***/
+
+int
+mhi_protocol_registered(int protocol)
+{
+ if (protocol >= 0 && protocol < MHI_L3_NPROTO)
+ return mhi_protocols[protocol];
+ if (protocol == MHI_L3_ANY)
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(mhi_protocol_registered);
+
+int
+mhi_register_protocol(int protocol)
+{
+ DPRINTK("mhi_register_protocol: %d\n", protocol);
+
+ if (protocol < 0 || protocol >= MHI_L3_NPROTO)
+ return -EINVAL;
+
+ mhi_protocols[protocol] = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(mhi_register_protocol);
+
+int
+mhi_unregister_protocol(int protocol)
+{
+ DPRINTK("mhi_unregister_protocol: %d\n", protocol);
+
+ if (protocol < 0 || protocol >= MHI_L3_NPROTO)
+ return -EINVAL;
+
+ mhi_protocols[protocol] = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL(mhi_unregister_protocol);
+
+
+int
+mhi_skb_send(
+ struct sk_buff *skb,
+ struct net_device *dev,
+ u8 proto)
+{
+ int err = 0;
+
+ DPRINTK("mhi_skb_send: proto:%d skb_len:%d\n", proto, skb->len);
+
+ skb->protocol = htons(ETH_P_MHI);
+ skb->dev = dev;
+
+ if (skb->pkt_type == PACKET_LOOPBACK) {
+ skb_orphan(skb);
+ netif_rx_ni(skb);
+ } else {
+
+ if ((proto == MHI_L3_XFILE) ||
+ (proto == MHI_L3_LOW_PRIO_TEST)) {
+ skb->priority = 1; /* Low prio */
+ } else if ((proto == MHI_L3_AUDIO) ||
+ (proto == MHI_L3_TEST_PRIO) ||
+ (proto == MHI_L3_HIGH_PRIO_TEST)) {
+ skb->priority = 6; /* high prio */
+ } else {
+ skb->priority = 0; /* medium prio */
+ }
+ err = dev_queue_xmit(skb);
+ }
+
+ return err;
+}
+
+int
+mhi_skb_recv(
+ struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *type,
+ struct net_device *orig_dev)
+{
+ struct l2muxhdr *l2hdr;
+
+ u8 l3pid;
+ u32 l3len;
+ int err;
+
+ l2hdr = l2mux_hdr(skb);
+
+ l3pid = l2mux_get_proto(l2hdr);
+ l3len = l2mux_get_length(l2hdr);
+
+ DPRINTK("mhi_skb_recv: skb_len:%d l3pid:%d l3len:%d\n",
+ skb->len, l3pid, l3len);
+
+ err = mhi_sock_rcv_multicast(skb, l3pid, l3len);
+
+ return err;
+}
+
+
+static struct packet_type mhi_packet_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_MHI),
+ .func = mhi_skb_recv,
+};
+
+
+static int __init mhi_proto_init(void)
+{
+ int err;
+
+ DPRINTK("mhi_proto_init\n");
+
+ err = mhi_sock_init();
+ if (err) {
+ printk(KERN_ALERT "MHI socket layer registration failed\n");
+ goto err0;
+ }
+
+ err = mhi_dgram_proto_init();
+ if (err) {
+ printk(KERN_ALERT "MHI DGRAM protocol layer reg failed\n");
+ goto err1;
+ }
+
+ err = mhi_raw_proto_init();
+ if (err) {
+ printk(KERN_ALERT "MHI RAW protocol layer reg failed\n");
+ goto err2;
+ }
+
+ dev_add_pack(&mhi_packet_type);
+
+ return 0;
+
+err2:
+ mhi_dgram_proto_exit();
+err1:
+ mhi_sock_exit();
+err0:
+ return err;
+}
+
+static void __exit mhi_proto_exit(void)
+{
+ DPRINTK("mhi_proto_exit\n");
+
+ dev_remove_pack(&mhi_packet_type);
+
+ mhi_dgram_proto_exit();
+ mhi_raw_proto_exit();
+ mhi_sock_exit();
+}
+
+module_init(mhi_proto_init);
+module_exit(mhi_proto_exit);
+
+MODULE_ALIAS_NETPROTO(PF_MHI);
+
+MODULE_AUTHOR("Petri Mattila <petri.to.mattila@renesasmobile.com>");
+MODULE_DESCRIPTION("MHI Protocol Family for Linux");
+MODULE_LICENSE("GPL");
+
diff --git a/net/mhi/mhi_raw.c b/net/mhi/mhi_raw.c
new file mode 100644
index 000000000000..e15216727e49
--- /dev/null
+++ b/net/mhi/mhi_raw.c
@@ -0,0 +1,326 @@
+/*
+ * File: mhi_raw.c
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * RAW socket implementation for MHI protocol family.
+ *
+ * It uses the MHI socket framework in mhi_socket.c
+ *
+ * This implementation is the most basic frame passing interface.
+ * The user space can use the sendmsg() and recvmsg() system calls
+ * to access the frames. The socket is created with the socket()
+ * system call, e.g. socket(PF_MHI,SOCK_RAW,l2proto).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <asm/ioctls.h>
+
+#include <net/af_mhi.h>
+#include <net/mhi/sock.h>
+#include <net/mhi/raw.h>
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "MHI/RAW: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/*** Prototypes ***/
+
+static struct proto mhi_raw_proto;
+
+static void mhi_raw_destruct(struct sock *sk);
+
+
+/*** Functions ***/
+
+int mhi_raw_sock_create(
+ struct net *net,
+ struct socket *sock,
+ int proto,
+ int kern)
+{
+ struct sock *sk;
+ struct mhi_sock *msk;
+
+ DPRINTK("mhi_raw_sock_create: proto:%d type:%d\n",
+ proto, sock->type);
+
+ if (sock->type != SOCK_RAW)
+ return -EPROTONOSUPPORT;
+
+ sk = sk_alloc(net, PF_MHI, GFP_KERNEL, &mhi_raw_proto);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ sock->ops = &mhi_socket_ops;
+ sock->state = SS_UNCONNECTED;
+
+ if (proto != MHI_L3_ANY)
+ sk->sk_protocol = proto;
+ else
+ sk->sk_protocol = 0;
+
+ sk->sk_destruct = mhi_raw_destruct;
+ sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+
+ sk->sk_prot->init(sk);
+
+ msk = mhi_sk(sk);
+
+ msk->sk_l3proto = proto;
+ msk->sk_ifindex = -1;
+
+ return 0;
+}
+
+static int mhi_raw_init(struct sock *sk)
+{
+ return 0;
+}
+
+static void mhi_raw_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static void mhi_raw_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static int mhi_raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ int err;
+
+ DPRINTK("mhi_raw_ioctl: cmd:%d arg:%lu\n", cmd, arg);
+
+ switch (cmd) {
+ case SIOCOUTQ:
+ {
+ int len;
+ len = sk_wmem_alloc_get(sk);
+ err = put_user(len, (int __user *)arg);
+ }
+ break;
+
+ case SIOCINQ:
+ {
+ struct sk_buff *skb;
+ int len;
+
+ lock_sock(sk);
+ {
+ skb = skb_peek(&sk->sk_receive_queue);
+ len = skb ? skb->len : 0;
+ }
+ release_sock(sk);
+
+ err = put_user(len, (int __user *)arg);
+ }
+ break;
+
+ default:
+ err = -ENOIOCTLCMD;
+ }
+
+ return err;
+}
+
+static int mhi_raw_sendmsg(
+ struct kiocb *iocb,
+ struct sock *sk,
+ struct msghdr *msg,
+ size_t len)
+{
+ struct mhi_sock *msk = mhi_sk(sk);
+ struct net_device *dev = NULL;
+ struct sk_buff *skb;
+
+ int err = -EFAULT;
+
+ if (msg->msg_flags &
+ ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_CMSG_COMPAT)) {
+ printk(KERN_WARNING
+ "mhi_raw_sendmsg: incompatible socket msg_flags: 0x%08X\n",
+ msg->msg_flags);
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ skb = sock_alloc_send_skb(sk,
+ len,
+ (msg->msg_flags & MSG_DONTWAIT),
+ &err);
+ if (!skb) {
+ printk(KERN_ERR
+ "mhi_raw_sendmsg: sock_alloc_send_skb failed: %d\n",
+ err);
+ goto out;
+ }
+
+ err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len);
+ if (err < 0) {
+ printk(KERN_ERR
+ "mhi_raw_sendmsg: memcpy_fromiovec failed: %d\n",
+ err);
+ goto drop;
+ }
+
+ if (msk->sk_ifindex)
+ dev = dev_get_by_index(sock_net(sk), msk->sk_ifindex);
+
+ if (!dev) {
+ printk(KERN_ERR
+ "mhi_raw_sendmsg: no device for ifindex:%d\n",
+ msk->sk_ifindex);
+ goto drop;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ printk(KERN_ERR
+ "mhi_raw_sendmsg: device %d not IFF_UP\n",
+ msk->sk_ifindex);
+ err = -ENETDOWN;
+ goto drop;
+ }
+
+ if (len > dev->mtu) {
+ err = -EMSGSIZE;
+ goto drop;
+ }
+
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
+ err = mhi_skb_send(skb, dev, sk->sk_protocol);
+
+ goto put;
+
+drop:
+ kfree(skb);
+put:
+ if (dev)
+ dev_put(dev);
+out:
+ return err;
+}
+
+static int mhi_raw_recvmsg(
+ struct kiocb *iocb,
+ struct sock *sk,
+ struct msghdr *msg,
+ size_t len,
+ int noblock,
+ int flags,
+ int *addr_len)
+{
+ struct sk_buff *skb = NULL;
+ int cnt, err;
+
+ err = -EOPNOTSUPP;
+
+ if (flags &
+ ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|
+ MSG_NOSIGNAL|MSG_CMSG_COMPAT)) {
+ printk(KERN_WARNING
+ "mhi_raw_recvmsg: incompatible socket flags: 0x%08X",
+ flags);
+ goto out2;
+ }
+
+ if (addr_len)
+ addr_len[0] = 0;
+
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out2;
+
+ cnt = skb->len;
+ if (len < cnt) {
+ msg->msg_flags |= MSG_TRUNC;
+ cnt = len;
+ }
+
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, cnt);
+ if (err)
+ goto out;
+
+ if (flags & MSG_TRUNC)
+ err = skb->len;
+ else
+ err = cnt;
+
+out:
+ skb_free_datagram(sk, skb);
+out2:
+ return err;
+}
+
+static int mhi_raw_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+
+static struct proto mhi_raw_proto = {
+ .name = "MHI-RAW",
+ .owner = THIS_MODULE,
+ .close = mhi_raw_close,
+ .ioctl = mhi_raw_ioctl,
+ .init = mhi_raw_init,
+ .sendmsg = mhi_raw_sendmsg,
+ .recvmsg = mhi_raw_recvmsg,
+ .backlog_rcv = mhi_raw_backlog_rcv,
+ .hash = mhi_sock_hash,
+ .unhash = mhi_sock_unhash,
+ .obj_size = sizeof(struct mhi_sock),
+};
+
+
+int mhi_raw_proto_init(void)
+{
+ DPRINTK("mhi_raw_proto_init\n");
+
+ return proto_register(&mhi_raw_proto, 1);
+}
+
+void mhi_raw_proto_exit(void)
+{
+ DPRINTK("mhi_raw_proto_exit\n");
+
+ proto_unregister(&mhi_raw_proto);
+}
+
diff --git a/net/mhi/mhi_socket.c b/net/mhi/mhi_socket.c
new file mode 100644
index 000000000000..232d67381eea
--- /dev/null
+++ b/net/mhi/mhi_socket.c
@@ -0,0 +1,310 @@
+/*
+ * File: mhi_socket.c
+ *
+ * Copyright (C) 2011 Renesas Mobile Corporation. All rights reserved.
+ *
+ * Author: Petri Mattila <petri.to.mattila@renesasmobile.com>
+ *
+ * Socket layer implementation for AF_MHI.
+ *
+ * This module implements generic sockets for MHI.
+ * The protocol is implemented separately, like mhi_dgram.c.
+ *
+ * As MHI does not have addressed, the MHI interface is
+ * defined by sa_ifindex field in sockaddr_mhi.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/mhi.h>
+#include <linux/l2mux.h>
+
+#include <net/tcp_states.h>
+#include <net/af_mhi.h>
+#include <net/mhi/sock.h>
+#include <net/mhi/dgram.h>
+#include <net/mhi/raw.h>
+
+#ifdef CONFIG_MHI_DEBUG
+# define DPRINTK(...) printk(KERN_DEBUG "MHI/SOCKET: " __VA_ARGS__)
+#else
+# define DPRINTK(...)
+#endif
+
+
+/* Master lock for MHI sockets */
+static DEFINE_SPINLOCK(mhi_sock_lock);
+
+/* List of MHI sockets */
+static struct hlist_head mhi_sock_list;
+
+
+static int mhi_sock_create(
+ struct net *net,
+ struct socket *sock,
+ int proto,
+ int kern)
+{
+ int err = 0;
+
+ DPRINTK("mhi_sock_create: type:%d proto:%d\n",
+ sock->type, proto);
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_NET_ADMIN)) {
+ printk(KERN_WARNING "AF_MHI: socket create failed: PERMISSION DENIED\n");
+ return -EPERM;
+ }
+
+ if (!mhi_protocol_registered(proto)) {
+ printk(KERN_WARNING "AF_MHI: socket create failed: No support for L2 channel %d\n",
+ proto);
+ return -EPROTONOSUPPORT;
+ }
+
+ if (sock->type == SOCK_DGRAM)
+ err = mhi_dgram_sock_create(net, sock, proto, kern);
+ else if (sock->type == SOCK_RAW)
+ err = mhi_raw_sock_create(net, sock, proto, kern);
+ else {
+ printk(KERN_WARNING "AF_MHI: trying to create a socket with unknown type %d\n",
+ sock->type);
+ err = -EPROTONOSUPPORT;
+ }
+
+ if (err)
+ printk(KERN_WARNING "AF_MHI: socket create failed: %d\n", err);
+
+ return err;
+}
+
+
+static int mhi_sock_release(struct socket *sock)
+{
+ if (sock->sk) {
+ DPRINTK("mhi_sock_release: proto:%d type:%d\n",
+ sock->sk->sk_protocol, sock->type);
+
+ sock->sk->sk_prot->close(sock->sk, 0);
+ sock->sk = NULL;
+ }
+
+ return 0;
+}
+
+static int mhi_sock_bind(
+ struct socket *sock,
+ struct sockaddr *addr,
+ int len) {
+ struct sock *sk = sock->sk;
+ struct mhi_sock *msk = mhi_sk(sk);
+ struct sockaddr_mhi *sam = sa_mhi(addr);
+
+ int err = 0;
+
+ DPRINTK("mhi_sock_bind: proto:%d state:%d\n",
+ sk->sk_protocol, sk->sk_state);
+
+ if (sk->sk_prot->bind)
+ return sk->sk_prot->bind(sk, addr, len);
+
+ if (len < sizeof(struct sockaddr_mhi))
+ return -EINVAL;
+
+ lock_sock(sk);
+ {
+ if (sk->sk_state == TCP_CLOSE) {
+ msk->sk_ifindex = sam->sa_ifindex;
+ WARN_ON(sk_hashed(sk));
+ sk->sk_prot->hash(sk);
+ } else {
+ err = -EINVAL; /* attempt to rebind */
+ }
+ }
+ release_sock(sk);
+
+ return err;
+}
+
+int mhi_sock_rcv_unicast(
+ struct sk_buff *skb,
+ u8 l3proto,
+ u32 l3length)
+{
+ struct sock *sknode;
+ struct mhi_sock *msk;
+
+ DPRINTK("%s: proto:%d, len:%d\n", l3proto, l3length, __func__);
+
+ spin_lock(&mhi_sock_lock);
+ {
+ sk_for_each(sknode, &mhi_sock_list) {
+ msk = mhi_sk(sknode);
+ if ((msk->sk_l3proto == MHI_L3_ANY ||
+ msk->sk_l3proto == l3proto) &&
+ (msk->sk_ifindex == skb->dev->ifindex)) {
+ sock_hold(sknode);
+ sk_receive_skb(sknode, skb, 0);
+ skb = NULL;
+ break;
+ }
+ }
+ }
+ spin_unlock(&mhi_sock_lock);
+
+ if (skb)
+ kfree_skb(skb);
+
+ return NET_RX_SUCCESS;
+}
+
+int mhi_sock_rcv_multicast(
+ struct sk_buff *skb,
+ u8 l3proto,
+ u32 l3length)
+{
+ struct sock *sknode;
+ struct mhi_sock *msk;
+ struct sk_buff *clone;
+
+ DPRINTK("%s: proto:%d, len:%d\n", l3proto, l3length, __func__);
+
+ spin_lock(&mhi_sock_lock);
+ {
+ sk_for_each(sknode, &mhi_sock_list) {
+ msk = mhi_sk(sknode);
+ if ((msk->sk_l3proto == MHI_L3_ANY ||
+ msk->sk_l3proto == l3proto) &&
+ (msk->sk_ifindex == skb->dev->ifindex)) {
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (likely(clone)) {
+ sock_hold(sknode);
+ sk_receive_skb(sknode, clone, 0);
+ }
+ }
+ }
+ }
+ spin_unlock(&mhi_sock_lock);
+
+ kfree_skb(skb);
+
+ return NET_RX_SUCCESS;
+}
+
+int mhi_sock_sendmsg(
+ struct kiocb *iocb,
+ struct socket *sock,
+ struct msghdr *msg,
+ size_t len)
+{
+ DPRINTK("mhi_sock_sendmsg: len:%lu\n", len);
+ return sock->sk->sk_prot->sendmsg(iocb, sock->sk, msg, len);
+}
+
+int mhi_sock_recvmsg(
+ struct kiocb *iocb,
+ struct socket *sock,
+ struct msghdr *msg,
+ size_t len,
+ int flags)
+{
+ int addrlen = 0;
+ int err;
+
+ err = sock->sk->sk_prot->recvmsg(iocb, sock->sk, msg, len,
+ flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT,
+ &addrlen);
+
+ if (err >= 0)
+ msg->msg_namelen = addrlen;
+
+ return err;
+}
+
+
+void mhi_sock_hash(struct sock *sk)
+{
+ DPRINTK("mhi_sock_hash: proto:%d\n", sk->sk_protocol);
+
+ spin_lock_bh(&mhi_sock_lock);
+ sk_add_node(sk, &mhi_sock_list);
+ spin_unlock_bh(&mhi_sock_lock);
+}
+
+void mhi_sock_unhash(struct sock *sk)
+{
+ DPRINTK("mhi_sock_unhash: proto:%d\n", sk->sk_protocol);
+
+ spin_lock_bh(&mhi_sock_lock);
+ sk_del_node_init(sk);
+ spin_unlock_bh(&mhi_sock_lock);
+}
+
+
+const struct proto_ops mhi_socket_ops = {
+ .family = AF_MHI,
+ .owner = THIS_MODULE,
+ .release = mhi_sock_release,
+ .bind = mhi_sock_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = sock_no_setsockopt,
+ .compat_getsockopt = sock_no_getsockopt,
+#endif
+ .sendmsg = mhi_sock_sendmsg,
+ .recvmsg = mhi_sock_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static const struct net_proto_family mhi_proto_family = {
+ .family = PF_MHI,
+ .create = mhi_sock_create,
+ .owner = THIS_MODULE,
+};
+
+
+int mhi_sock_init(void)
+{
+ DPRINTK("mhi_sock_init\n");
+
+ INIT_HLIST_HEAD(&mhi_sock_list);
+ spin_lock_init(&mhi_sock_lock);
+
+ return sock_register(&mhi_proto_family);
+}
+
+void mhi_sock_exit(void)
+{
+ DPRINTK("mhi_sock_exit\n");
+
+ sock_unregister(PF_MHI);
+}
+
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..bdf9c60b296a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1089,6 +1089,8 @@ config NETFILTER_XT_MATCH_OWNER
based on who created the socket: the user or group. It is also
possible to check whether a socket actually exists.
+ Conflicts with '"quota, tag, uid" match'
+
config NETFILTER_XT_MATCH_POLICY
tristate 'IPsec "policy" match support'
depends on XFRM
@@ -1122,6 +1124,22 @@ config NETFILTER_XT_MATCH_PKTTYPE
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_QTAGUID
+ bool '"quota, tag, owner" match and stats support'
+ depends on NETFILTER_XT_MATCH_SOCKET
+ depends on NETFILTER_XT_MATCH_OWNER=n
+ help
+ This option replaces the `owner' match. In addition to matching
+ on uid, it keeps stats based on a tag assigned to a socket.
+ The full tag is comprised of a UID and an accounting tag.
+ The tags are assignable to sockets from user space (e.g. a download
+ manager can assign the socket to another UID for accounting).
+ Stats and control are done via /proc/net/xt_qtaguid/.
+ It replaces owner as it takes the same arguments, but should
+ really be recognized by the iptables tool.
+
+ If unsure, say `N'.
+
config NETFILTER_XT_MATCH_QUOTA
tristate '"quota" match support'
depends on NETFILTER_ADVANCED
@@ -1132,6 +1150,30 @@ config NETFILTER_XT_MATCH_QUOTA
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+config NETFILTER_XT_MATCH_QUOTA2
+ tristate '"quota2" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `quota2' match, which allows to match on a
+ byte counter correctly and not per CPU.
+ It allows naming the quotas.
+ This is based on http://xtables-addons.git.sourceforge.net
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_QUOTA2_LOG
+ bool '"quota2" Netfilter LOG support'
+ depends on NETFILTER_XT_MATCH_QUOTA2
+ depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no
+ default n
+ help
+ This option allows `quota2' to log ONCE when a quota limit
+ is passed. It logs via NETLINK using the NETLINK_NFLOG family.
+ It logs similarly to how ipt_ULOG would without data.
+
+ If unsure, say `N'.
+
config NETFILTER_XT_MATCH_RATEEST
tristate '"rateest" match support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..d9655f6f3466 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -127,7 +127,9 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o
obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o
obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f407ebc13481..f6562ba97a97 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -5,6 +5,7 @@
* After timer expires a kevent will be sent.
*
* Copyright (C) 2004, 2010 Nokia Corporation
+ *
* Written by Timo Teras <ext-timo.teras@nokia.com>
*
* Converted to x_tables and reworked for upstream inclusion
@@ -38,8 +39,15 @@
#include <linux/netfilter/xt_IDLETIMER.h>
#include <linux/kdev_t.h>
#include <linux/kobject.h>
+#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <linux/sysfs.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/suspend.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
struct idletimer_tg_attr {
struct attribute attr;
@@ -55,14 +63,97 @@ struct idletimer_tg {
struct kobject *kobj;
struct idletimer_tg_attr attr;
+ struct timespec delayed_timer_trigger;
+ struct timespec last_modified_timer;
+ struct timespec last_suspend_time;
+ struct notifier_block pm_nb;
+
+ int timeout;
unsigned int refcnt;
+ bool work_pending;
+ bool send_nl_msg;
+ bool active;
};
static LIST_HEAD(idletimer_tg_list);
static DEFINE_MUTEX(list_mutex);
+static DEFINE_SPINLOCK(timestamp_lock);
static struct kobject *idletimer_tg_kobj;
+static bool check_for_delayed_trigger(struct idletimer_tg *timer,
+ struct timespec *ts)
+{
+ bool state;
+ struct timespec temp;
+ spin_lock_bh(&timestamp_lock);
+ timer->work_pending = false;
+ if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout ||
+ timer->delayed_timer_trigger.tv_sec != 0) {
+ state = false;
+ temp.tv_sec = timer->timeout;
+ temp.tv_nsec = 0;
+ if (timer->delayed_timer_trigger.tv_sec != 0) {
+ temp = timespec_add(timer->delayed_timer_trigger, temp);
+ ts->tv_sec = temp.tv_sec;
+ ts->tv_nsec = temp.tv_nsec;
+ timer->delayed_timer_trigger.tv_sec = 0;
+ timer->work_pending = true;
+ schedule_work(&timer->work);
+ } else {
+ temp = timespec_add(timer->last_modified_timer, temp);
+ ts->tv_sec = temp.tv_sec;
+ ts->tv_nsec = temp.tv_nsec;
+ }
+ } else {
+ state = timer->active;
+ }
+ spin_unlock_bh(&timestamp_lock);
+ return state;
+}
+
+static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer)
+{
+ char iface_msg[NLMSG_MAX_SIZE];
+ char state_msg[NLMSG_MAX_SIZE];
+ char timestamp_msg[NLMSG_MAX_SIZE];
+ char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL };
+ int res;
+ struct timespec ts;
+ uint64_t time_ns;
+ bool state;
+
+ res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s",
+ iface);
+ if (NLMSG_MAX_SIZE <= res) {
+ pr_err("message too long (%d)", res);
+ return;
+ }
+
+ get_monotonic_boottime(&ts);
+ state = check_for_delayed_trigger(timer, &ts);
+ res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s",
+ state ? "active" : "inactive");
+
+ if (NLMSG_MAX_SIZE <= res) {
+ pr_err("message too long (%d)", res);
+ return;
+ }
+
+ time_ns = timespec_to_ns(&ts);
+ res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns);
+ if (NLMSG_MAX_SIZE <= res) {
+ timestamp_msg[0] = '\0';
+ pr_err("message too long (%d)", res);
+ }
+
+ pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg);
+ kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp);
+ return;
+
+
+}
+
static
struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
{
@@ -83,6 +174,7 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
{
struct idletimer_tg *timer;
unsigned long expires = 0;
+ unsigned long now = jiffies;
mutex_lock(&list_mutex);
@@ -92,11 +184,15 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
mutex_unlock(&list_mutex);
- if (time_after(expires, jiffies))
+ if (time_after(expires, now))
return sprintf(buf, "%u\n",
- jiffies_to_msecs(expires - jiffies) / 1000);
+ jiffies_to_msecs(expires - now) / 1000);
- return sprintf(buf, "0\n");
+ if (timer->send_nl_msg)
+ return sprintf(buf, "0 %d\n",
+ jiffies_to_msecs(now - expires) / 1000);
+ else
+ return sprintf(buf, "0\n");
}
static void idletimer_tg_work(struct work_struct *work)
@@ -105,6 +201,9 @@ static void idletimer_tg_work(struct work_struct *work)
work);
sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+
+ if (timer->send_nl_msg)
+ notify_netlink_uevent(timer->attr.attr.name, timer);
}
static void idletimer_tg_expired(unsigned long data)
@@ -112,8 +211,55 @@ static void idletimer_tg_expired(unsigned long data)
struct idletimer_tg *timer = (struct idletimer_tg *) data;
pr_debug("timer %s expired\n", timer->attr.attr.name);
-
+ spin_lock_bh(&timestamp_lock);
+ timer->active = false;
+ timer->work_pending = true;
schedule_work(&timer->work);
+ spin_unlock_bh(&timestamp_lock);
+}
+
+static int idletimer_resume(struct notifier_block *notifier,
+ unsigned long pm_event, void *unused)
+{
+ struct timespec ts;
+ unsigned long time_diff, now = jiffies;
+ struct idletimer_tg *timer = container_of(notifier,
+ struct idletimer_tg, pm_nb);
+ if (!timer)
+ return NOTIFY_DONE;
+ switch (pm_event) {
+ case PM_SUSPEND_PREPARE:
+ get_monotonic_boottime(&timer->last_suspend_time);
+ break;
+ case PM_POST_SUSPEND:
+ spin_lock_bh(&timestamp_lock);
+ if (!timer->active) {
+ spin_unlock_bh(&timestamp_lock);
+ break;
+ }
+ /* since jiffies are not updated when suspended now represents
+ * the time it would have suspended */
+ if (time_after(timer->timer.expires, now)) {
+ get_monotonic_boottime(&ts);
+ ts = timespec_sub(ts, timer->last_suspend_time);
+ time_diff = timespec_to_jiffies(&ts);
+ if (timer->timer.expires > (time_diff + now)) {
+ mod_timer_pending(&timer->timer,
+ (timer->timer.expires - time_diff));
+ } else {
+ del_timer(&timer->timer);
+ timer->timer.expires = 0;
+ timer->active = false;
+ timer->work_pending = true;
+ schedule_work(&timer->work);
+ }
+ }
+ spin_unlock_bh(&timestamp_lock);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
}
static int idletimer_tg_create(struct idletimer_tg_info *info)
@@ -145,6 +291,20 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
setup_timer(&info->timer->timer, idletimer_tg_expired,
(unsigned long) info->timer);
info->timer->refcnt = 1;
+ info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true;
+ info->timer->active = true;
+ info->timer->timeout = info->timeout;
+
+ info->timer->delayed_timer_trigger.tv_sec = 0;
+ info->timer->delayed_timer_trigger.tv_nsec = 0;
+ info->timer->work_pending = false;
+ get_monotonic_boottime(&info->timer->last_modified_timer);
+
+ info->timer->pm_nb.notifier_call = idletimer_resume;
+ ret = register_pm_notifier(&info->timer->pm_nb);
+ if (ret)
+ printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n",
+ __func__, ret);
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
@@ -161,6 +321,34 @@ out:
return ret;
}
+static void reset_timer(const struct idletimer_tg_info *info)
+{
+ unsigned long now = jiffies;
+ struct idletimer_tg *timer = info->timer;
+ bool timer_prev;
+
+ spin_lock_bh(&timestamp_lock);
+ timer_prev = timer->active;
+ timer->active = true;
+ /* timer_prev is used to guard overflow problem in time_before*/
+ if (!timer_prev || time_before(timer->timer.expires, now)) {
+ pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n",
+ timer->timer.expires, now);
+ /* checks if there is a pending inactive notification*/
+ if (timer->work_pending)
+ timer->delayed_timer_trigger = timer->last_modified_timer;
+ else {
+ timer->work_pending = true;
+ schedule_work(&timer->work);
+ }
+ }
+
+ get_monotonic_boottime(&timer->last_modified_timer);
+ mod_timer(&timer->timer,
+ msecs_to_jiffies(info->timeout * 1000) + now);
+ spin_unlock_bh(&timestamp_lock);
+}
+
/*
* The actual xt_tables plugin.
*/
@@ -168,15 +356,23 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
const struct xt_action_param *par)
{
const struct idletimer_tg_info *info = par->targinfo;
+ unsigned long now = jiffies;
pr_debug("resetting timer %s, timeout period %u\n",
info->label, info->timeout);
BUG_ON(!info->timer);
- mod_timer(&info->timer->timer,
- msecs_to_jiffies(info->timeout * 1000) + jiffies);
+ info->timer->active = true;
+ if (time_before(info->timer->timer.expires, now)) {
+ schedule_work(&info->timer->work);
+ pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n",
+ info->label, info->timer->timer.expires, now);
+ }
+
+ /* TODO: Avoid modifying timers on each packet */
+ reset_timer(info);
return XT_CONTINUE;
}
@@ -185,7 +381,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
struct idletimer_tg_info *info = par->targinfo;
int ret;
- pr_debug("checkentry targinfo%s\n", info->label);
+ pr_debug("checkentry targinfo %s\n", info->label);
if (info->timeout == 0) {
pr_debug("timeout value is zero\n");
@@ -204,9 +400,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
info->timer = __idletimer_tg_find_by_label(info->label);
if (info->timer) {
info->timer->refcnt++;
- mod_timer(&info->timer->timer,
- msecs_to_jiffies(info->timeout * 1000) + jiffies);
-
+ reset_timer(info);
pr_debug("increased refcnt of timer %s to %u\n",
info->label, info->timer->refcnt);
} else {
@@ -219,6 +413,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
}
mutex_unlock(&list_mutex);
+
return 0;
}
@@ -236,11 +431,12 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
list_del(&info->timer->entry);
del_timer_sync(&info->timer->timer);
sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+ unregister_pm_notifier(&info->timer->pm_nb);
kfree(info->timer->attr.attr.name);
kfree(info->timer);
} else {
pr_debug("decreased refcnt of timer %s to %u\n",
- info->label, info->timer->refcnt);
+ info->label, info->timer->refcnt);
}
mutex_unlock(&list_mutex);
@@ -248,6 +444,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
static struct xt_target idletimer_tg __read_mostly = {
.name = "IDLETIMER",
+ .revision = 1,
.family = NFPROTO_UNSPEC,
.target = idletimer_tg_target,
.targetsize = sizeof(struct idletimer_tg_info),
@@ -313,3 +510,4 @@ MODULE_DESCRIPTION("Xtables: idle time monitor");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("ipt_IDLETIMER");
MODULE_ALIAS("ip6t_IDLETIMER");
+MODULE_ALIAS("arpt_IDLETIMER");
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 000000000000..eee667987f7d
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,3012 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
+
+#include <linux/file.h>
+#include <linux/inetdevice.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_qtaguid.h>
+#include <linux/ratelimit.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
+
+#include <linux/netfilter/xt_socket.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+#include "../../fs/proc/internal.h"
+
+/*
+ * We only use the xt_socket funcs within a similar context to avoid unexpected
+ * return values.
+ */
+#define XT_SOCKET_SUPPORTED_HOOKS \
+ ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
+
+
+static const char *module_procdirname = "xt_qtaguid";
+static struct proc_dir_entry *xt_qtaguid_procdir;
+
+static unsigned int proc_iface_perms = S_IRUGO;
+module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_stats_file;
+static unsigned int proc_stats_perms = S_IRUGO;
+module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_ctrl_file;
+
+/* Everybody can write. But proc_ctrl_write_limited is true by default which
+ * limits what can be controlled. See the can_*() functions.
+ */
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
+module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
+
+/* Limited by default, so the gid of the ctrl and stats proc entries
+ * will limit what can be done. See the can_*() functions.
+ */
+static bool proc_stats_readall_limited = true;
+static bool proc_ctrl_write_limited = true;
+
+module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
+ S_IRUGO | S_IWUSR);
+module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
+ S_IRUGO | S_IWUSR);
+
+/*
+ * Limit the number of active tags (via socket tags) for a given UID.
+ * Multiple processes could share the UID.
+ */
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
+
+/*
+ * After the kernel has initiallized this module, it is still possible
+ * to make it passive.
+ * Setting passive to Y:
+ * - the iface stats handling will not act on notifications.
+ * - iptables matches will never match.
+ * - ctrl commands silently succeed.
+ * - stats are always empty.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool module_passive;
+module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
+
+/*
+ * Control how qtaguid data is tracked per proc/uid.
+ * Setting tag_tracking_passive to Y:
+ * - don't create proc specific structs to track tags
+ * - don't check that active tag stats exceed some limits.
+ * - don't clean up socket tags on process exits.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool qtu_proc_handling_passive;
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
+ S_IRUGO | S_IWUSR);
+
+#define QTU_DEV_NAME "xt_qtaguid"
+
+uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
+module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
+
+/*---------------------------------------------------------------------------*/
+static const char *iface_stat_procdirname = "iface_stat";
+static struct proc_dir_entry *iface_stat_procdir;
+/*
+ * The iface_stat_all* will go away once userspace gets use to the new fields
+ * that have a format line.
+ */
+static const char *iface_stat_all_procfilename = "iface_stat_all";
+static struct proc_dir_entry *iface_stat_all_procfile;
+static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
+static struct proc_dir_entry *iface_stat_fmt_procfile;
+
+
+static LIST_HEAD(iface_stat_list);
+static DEFINE_SPINLOCK(iface_stat_list_lock);
+
+static struct rb_root sock_tag_tree = RB_ROOT;
+static DEFINE_SPINLOCK(sock_tag_list_lock);
+
+static struct rb_root tag_counter_set_tree = RB_ROOT;
+static DEFINE_SPINLOCK(tag_counter_set_list_lock);
+
+static struct rb_root uid_tag_data_tree = RB_ROOT;
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
+
+static struct rb_root proc_qtu_data_tree = RB_ROOT;
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
+
+static struct qtaguid_event_counts qtu_events;
+/*----------------------------------------------*/
+static bool can_manipulate_uids(void)
+{
+ /* root pwnd */
+ return in_egroup_p(xt_qtaguid_ctrl_file->gid)
+ || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
+ || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
+}
+
+static bool can_impersonate_uid(uid_t uid)
+{
+ return uid == current_fsuid() || can_manipulate_uids();
+}
+
+static bool can_read_other_uid_stats(uid_t uid)
+{
+ /* root pwnd */
+ return in_egroup_p(xt_qtaguid_stats_file->gid)
+ || unlikely(!current_fsuid()) || uid == current_fsuid()
+ || unlikely(!proc_stats_readall_limited)
+ || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
+}
+
+static inline void dc_add_byte_packets(struct data_counters *counters, int set,
+ enum ifs_tx_rx direction,
+ enum ifs_proto ifs_proto,
+ int bytes,
+ int packets)
+{
+ counters->bpc[set][direction][ifs_proto].bytes += bytes;
+ counters->bpc[set][direction][ifs_proto].packets += packets;
+}
+
+static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct tag_node *data = rb_entry(node, struct tag_node, node);
+ int result;
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+ " node=%p data=%p\n", tag, node, data);
+ result = tag_compare(tag, data->tag);
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+ " data.tag=0x%llx (uid=%u) res=%d\n",
+ tag, data->tag, get_uid_from_tag(data->tag), result);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct tag_node *this = rb_entry(*new, struct tag_node,
+ node);
+ int result = tag_compare(data->tag, this->tag);
+ RB_DEBUG("qtaguid: %s(): tag=0x%llx"
+ " (uid=%u)\n", __func__,
+ this->tag,
+ get_uid_from_tag(this->tag));
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
+{
+ tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
+{
+ struct tag_node *node = tag_node_tree_search(root, tag);
+ if (!node)
+ return NULL;
+ return rb_entry(&node->node, struct tag_stat, tn.node);
+}
+
+static void tag_counter_set_tree_insert(struct tag_counter_set *data,
+ struct rb_root *root)
+{
+ tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
+ tag_t tag)
+{
+ struct tag_node *node = tag_node_tree_search(root, tag);
+ if (!node)
+ return NULL;
+ return rb_entry(&node->node, struct tag_counter_set, tn.node);
+
+}
+
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
+{
+ tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
+{
+ struct tag_node *node = tag_node_tree_search(root, tag);
+ if (!node)
+ return NULL;
+ return rb_entry(&node->node, struct tag_ref, tn.node);
+}
+
+static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
+ const struct sock *sk)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct sock_tag *data = rb_entry(node, struct sock_tag,
+ sock_node);
+ if (sk < data->sk)
+ node = node->rb_left;
+ else if (sk > data->sk)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct sock_tag *this = rb_entry(*new, struct sock_tag,
+ sock_node);
+ parent = *new;
+ if (data->sk < this->sk)
+ new = &((*new)->rb_left);
+ else if (data->sk > this->sk)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->sock_node, parent, new);
+ rb_insert_color(&data->sock_node, root);
+}
+
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
+{
+ struct rb_node *node;
+ struct sock_tag *st_entry;
+
+ node = rb_first(st_to_free_tree);
+ while (node) {
+ st_entry = rb_entry(node, struct sock_tag, sock_node);
+ node = rb_next(node);
+ CT_DEBUG("qtaguid: %s(): "
+ "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
+ st_entry->sk,
+ st_entry->tag,
+ get_uid_from_tag(st_entry->tag));
+ rb_erase(&st_entry->sock_node, st_to_free_tree);
+ sockfd_put(st_entry->socket);
+ kfree(st_entry);
+ }
+}
+
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
+ const pid_t pid)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct proc_qtu_data *data = rb_entry(node,
+ struct proc_qtu_data,
+ node);
+ if (pid < data->pid)
+ node = node->rb_left;
+ else if (pid > data->pid)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
+ struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct proc_qtu_data *this = rb_entry(*new,
+ struct proc_qtu_data,
+ node);
+ parent = *new;
+ if (data->pid < this->pid)
+ new = &((*new)->rb_left);
+ else if (data->pid > this->pid)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void uid_tag_data_tree_insert(struct uid_tag_data *data,
+ struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct uid_tag_data *this = rb_entry(*new,
+ struct uid_tag_data,
+ node);
+ parent = *new;
+ if (data->uid < this->uid)
+ new = &((*new)->rb_left);
+ else if (data->uid > this->uid)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
+ uid_t uid)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct uid_tag_data *data = rb_entry(node,
+ struct uid_tag_data,
+ node);
+ if (uid < data->uid)
+ node = node->rb_left;
+ else if (uid > data->uid)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+/*
+ * Allocates a new uid_tag_data struct if needed.
+ * Returns a pointer to the found or allocated uid_tag_data.
+ * Returns a PTR_ERR on failures, and lock is not held.
+ * If found is not NULL:
+ * sets *found to true if not allocated.
+ * sets *found to false if allocated.
+ */
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
+{
+ struct uid_tag_data *utd_entry;
+
+ /* Look for top level uid_tag_data for the UID */
+ utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
+ DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
+
+ if (found_res)
+ *found_res = utd_entry;
+ if (utd_entry)
+ return utd_entry;
+
+ utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
+ if (!utd_entry) {
+ pr_err("qtaguid: get_uid_data(%u): "
+ "tag data alloc failed\n", uid);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ utd_entry->uid = uid;
+ utd_entry->tag_ref_tree = RB_ROOT;
+ uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
+ DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
+ return utd_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *new_tag_ref(tag_t new_tag,
+ struct uid_tag_data *utd_entry)
+{
+ struct tag_ref *tr_entry;
+ int res;
+
+ if (utd_entry->num_active_tags + 1 > max_sock_tags) {
+ pr_info("qtaguid: new_tag_ref(0x%llx): "
+ "tag ref alloc quota exceeded. max=%d\n",
+ new_tag, max_sock_tags);
+ res = -EMFILE;
+ goto err_res;
+
+ }
+
+ tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
+ if (!tr_entry) {
+ pr_err("qtaguid: new_tag_ref(0x%llx): "
+ "tag ref alloc failed\n",
+ new_tag);
+ res = -ENOMEM;
+ goto err_res;
+ }
+ tr_entry->tn.tag = new_tag;
+ /* tr_entry->num_sock_tags handled by caller */
+ utd_entry->num_active_tags++;
+ tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
+ DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
+ " inserted new tag ref %p\n",
+ new_tag, tr_entry);
+ return tr_entry;
+
+err_res:
+ return ERR_PTR(res);
+}
+
+static struct tag_ref *lookup_tag_ref(tag_t full_tag,
+ struct uid_tag_data **utd_res)
+{
+ struct uid_tag_data *utd_entry;
+ struct tag_ref *tr_entry;
+ bool found_utd;
+ uid_t uid = get_uid_from_tag(full_tag);
+
+ DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
+ full_tag, uid);
+
+ utd_entry = get_uid_data(uid, &found_utd);
+ if (IS_ERR_OR_NULL(utd_entry)) {
+ if (utd_res)
+ *utd_res = utd_entry;
+ return NULL;
+ }
+
+ tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
+ if (utd_res)
+ *utd_res = utd_entry;
+ DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
+ full_tag, utd_entry, tr_entry);
+ return tr_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *get_tag_ref(tag_t full_tag,
+ struct uid_tag_data **utd_res)
+{
+ struct uid_tag_data *utd_entry;
+ struct tag_ref *tr_entry;
+
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
+ full_tag);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ tr_entry = lookup_tag_ref(full_tag, &utd_entry);
+ BUG_ON(IS_ERR_OR_NULL(utd_entry));
+ if (!tr_entry)
+ tr_entry = new_tag_ref(full_tag, utd_entry);
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ if (utd_res)
+ *utd_res = utd_entry;
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
+ full_tag, utd_entry, tr_entry);
+ return tr_entry;
+}
+
+/* Checks and maybe frees the UID Tag Data entry */
+static void put_utd_entry(struct uid_tag_data *utd_entry)
+{
+ /* Are we done with the UID tag data entry? */
+ if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
+ !utd_entry->num_pqd) {
+ DR_DEBUG("qtaguid: %s(): "
+ "erase utd_entry=%p uid=%u "
+ "by pid=%u tgid=%u uid=%u\n", __func__,
+ utd_entry, utd_entry->uid,
+ current->pid, current->tgid, current_fsuid());
+ BUG_ON(utd_entry->num_active_tags);
+ rb_erase(&utd_entry->node, &uid_tag_data_tree);
+ kfree(utd_entry);
+ } else {
+ DR_DEBUG("qtaguid: %s(): "
+ "utd_entry=%p still has %d tags %d proc_qtu_data\n",
+ __func__, utd_entry, utd_entry->num_active_tags,
+ utd_entry->num_pqd);
+ BUG_ON(!(utd_entry->num_active_tags ||
+ utd_entry->num_pqd));
+ }
+}
+
+/*
+ * If no sock_tags are using this tag_ref,
+ * decrements refcount of utd_entry, removes tr_entry
+ * from utd_entry->tag_ref_tree and frees.
+ */
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
+ struct uid_tag_data *utd_entry)
+{
+ DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
+ tr_entry, tr_entry->tn.tag,
+ get_uid_from_tag(tr_entry->tn.tag));
+ if (!tr_entry->num_sock_tags) {
+ BUG_ON(!utd_entry->num_active_tags);
+ utd_entry->num_active_tags--;
+ rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
+ DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
+ kfree(tr_entry);
+ }
+}
+
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
+{
+ struct rb_node *node;
+ struct tag_ref *tr_entry;
+ tag_t acct_tag;
+
+ DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
+ full_tag, get_uid_from_tag(full_tag));
+ acct_tag = get_atag_from_tag(full_tag);
+ node = rb_first(&utd_entry->tag_ref_tree);
+ while (node) {
+ tr_entry = rb_entry(node, struct tag_ref, tn.node);
+ node = rb_next(node);
+ if (!acct_tag || tr_entry->tn.tag == full_tag)
+ free_tag_ref_from_utd_entry(tr_entry, utd_entry);
+ }
+}
+
+static ssize_t read_proc_u64(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ uint64_t *valuep = PDE_DATA(file_inode(file));
+ char tmp[24];
+ size_t tmp_size;
+
+ tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
+ return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t read_proc_bool(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ bool *valuep = PDE_DATA(file_inode(file));
+ char tmp[24];
+ size_t tmp_size;
+
+ tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
+ return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static int get_active_counter_set(tag_t tag)
+{
+ int active_set = 0;
+ struct tag_counter_set *tcs;
+
+ MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
+ " (uid=%u)\n",
+ tag, get_uid_from_tag(tag));
+ /* For now we only handle UID tags for active sets */
+ tag = get_utag_from_tag(tag);
+ spin_lock_bh(&tag_counter_set_list_lock);
+ tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+ if (tcs)
+ active_set = tcs->active_set;
+ spin_unlock_bh(&tag_counter_set_list_lock);
+ return active_set;
+}
+
+/*
+ * Find the entry for tracking the specified interface.
+ * Caller must hold iface_stat_list_lock
+ */
+static struct iface_stat *get_iface_entry(const char *ifname)
+{
+ struct iface_stat *iface_entry;
+
+ /* Find the entry for tracking the specified tag within the interface */
+ if (ifname == NULL) {
+ pr_info("qtaguid: iface_stat: get() NULL device name\n");
+ return NULL;
+ }
+
+ /* Iterate over interfaces */
+ list_for_each_entry(iface_entry, &iface_stat_list, list) {
+ if (!strcmp(ifname, iface_entry->ifname))
+ goto done;
+ }
+ iface_entry = NULL;
+done:
+ return iface_entry;
+}
+
+/* This is for fmt2 only */
+static void pp_iface_stat_header(struct seq_file *m)
+{
+ seq_puts(m,
+ "ifname "
+ "total_skb_rx_bytes total_skb_rx_packets "
+ "total_skb_tx_bytes total_skb_tx_packets "
+ "rx_tcp_bytes rx_tcp_packets "
+ "rx_udp_bytes rx_udp_packets "
+ "rx_other_bytes rx_other_packets "
+ "tx_tcp_bytes tx_tcp_packets "
+ "tx_udp_bytes tx_udp_packets "
+ "tx_other_bytes tx_other_packets\n"
+ );
+}
+
+static void pp_iface_stat_line(struct seq_file *m,
+ struct iface_stat *iface_entry)
+{
+ struct data_counters *cnts;
+ int cnt_set = 0; /* We only use one set for the device */
+ cnts = &iface_entry->totals_via_skb;
+ seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ iface_entry->ifname,
+ dc_sum_bytes(cnts, cnt_set, IFS_RX),
+ dc_sum_packets(cnts, cnt_set, IFS_RX),
+ dc_sum_bytes(cnts, cnt_set, IFS_TX),
+ dc_sum_packets(cnts, cnt_set, IFS_TX),
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+}
+
+struct proc_iface_stat_fmt_info {
+ int fmt;
+};
+
+static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
+{
+ struct proc_iface_stat_fmt_info *p = m->private;
+ loff_t n = *pos;
+
+ /*
+ * This lock will prevent iface_stat_update() from changing active,
+ * and in turn prevent an interface from unregistering itself.
+ */
+ spin_lock_bh(&iface_stat_list_lock);
+
+ if (unlikely(module_passive))
+ return NULL;
+
+ if (!n && p->fmt == 2)
+ pp_iface_stat_header(m);
+
+ return seq_list_start(&iface_stat_list, n);
+}
+
+static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &iface_stat_list, pos);
+}
+
+static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
+{
+ spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
+{
+ struct proc_iface_stat_fmt_info *p = m->private;
+ struct iface_stat *iface_entry;
+ struct rtnl_link_stats64 dev_stats, *stats;
+ struct rtnl_link_stats64 no_dev_stats = {0};
+
+
+ CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
+ current->pid, current->tgid, current_fsuid());
+
+ iface_entry = list_entry(v, struct iface_stat, list);
+
+ if (iface_entry->active) {
+ stats = dev_get_stats(iface_entry->net_dev,
+ &dev_stats);
+ } else {
+ stats = &no_dev_stats;
+ }
+ /*
+ * If the meaning of the data changes, then update the fmtX
+ * string.
+ */
+ if (p->fmt == 1) {
+ seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ iface_entry->ifname,
+ iface_entry->active,
+ iface_entry->totals_via_dev[IFS_RX].bytes,
+ iface_entry->totals_via_dev[IFS_RX].packets,
+ iface_entry->totals_via_dev[IFS_TX].bytes,
+ iface_entry->totals_via_dev[IFS_TX].packets,
+ stats->rx_bytes, stats->rx_packets,
+ stats->tx_bytes, stats->tx_packets
+ );
+ } else {
+ pp_iface_stat_line(m, iface_entry);
+ }
+ return 0;
+}
+
+static const struct file_operations read_u64_fops = {
+ .read = read_proc_u64,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations read_bool_fops = {
+ .read = read_proc_bool,
+ .llseek = default_llseek,
+};
+
+static void iface_create_proc_worker(struct work_struct *work)
+{
+ struct proc_dir_entry *proc_entry;
+ struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
+ iface_work);
+ struct iface_stat *new_iface = isw->iface_entry;
+
+ /* iface_entries are not deleted, so safe to manipulate. */
+ proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
+ if (IS_ERR_OR_NULL(proc_entry)) {
+ pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
+ kfree(isw);
+ return;
+ }
+
+ new_iface->proc_ptr = proc_entry;
+
+ proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
+ &read_u64_fops,
+ &new_iface->totals_via_dev[IFS_TX].bytes);
+ proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
+ &read_u64_fops,
+ &new_iface->totals_via_dev[IFS_RX].bytes);
+ proc_create_data("tx_packets", proc_iface_perms, proc_entry,
+ &read_u64_fops,
+ &new_iface->totals_via_dev[IFS_TX].packets);
+ proc_create_data("rx_packets", proc_iface_perms, proc_entry,
+ &read_u64_fops,
+ &new_iface->totals_via_dev[IFS_RX].packets);
+ proc_create_data("active", proc_iface_perms, proc_entry,
+ &read_bool_fops, &new_iface->active);
+
+ IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
+ "entry=%p dev=%s\n", new_iface, new_iface->ifname);
+ kfree(isw);
+}
+
+/*
+ * Will set the entry's active state, and
+ * update the net_dev accordingly also.
+ */
+static void _iface_stat_set_active(struct iface_stat *entry,
+ struct net_device *net_dev,
+ bool activate)
+{
+ if (activate) {
+ entry->net_dev = net_dev;
+ entry->active = true;
+ IF_DEBUG("qtaguid: %s(%s): "
+ "enable tracking. rfcnt=%d\n", __func__,
+ entry->ifname,
+ __this_cpu_read(*net_dev->pcpu_refcnt));
+ } else {
+ entry->active = false;
+ entry->net_dev = NULL;
+ IF_DEBUG("qtaguid: %s(%s): "
+ "disable tracking. rfcnt=%d\n", __func__,
+ entry->ifname,
+ __this_cpu_read(*net_dev->pcpu_refcnt));
+
+ }
+}
+
+/* Caller must hold iface_stat_list_lock */
+static struct iface_stat *iface_alloc(struct net_device *net_dev)
+{
+ struct iface_stat *new_iface;
+ struct iface_stat_work *isw;
+
+ new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
+ if (new_iface == NULL) {
+ pr_err("qtaguid: iface_stat: create(%s): "
+ "iface_stat alloc failed\n", net_dev->name);
+ return NULL;
+ }
+ new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
+ if (new_iface->ifname == NULL) {
+ pr_err("qtaguid: iface_stat: create(%s): "
+ "ifname alloc failed\n", net_dev->name);
+ kfree(new_iface);
+ return NULL;
+ }
+ spin_lock_init(&new_iface->tag_stat_list_lock);
+ new_iface->tag_stat_tree = RB_ROOT;
+ _iface_stat_set_active(new_iface, net_dev, true);
+
+ /*
+ * ipv6 notifier chains are atomic :(
+ * No create_proc_read_entry() for you!
+ */
+ isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
+ if (!isw) {
+ pr_err("qtaguid: iface_stat: create(%s): "
+ "work alloc failed\n", new_iface->ifname);
+ _iface_stat_set_active(new_iface, net_dev, false);
+ kfree(new_iface->ifname);
+ kfree(new_iface);
+ return NULL;
+ }
+ isw->iface_entry = new_iface;
+ INIT_WORK(&isw->iface_work, iface_create_proc_worker);
+ schedule_work(&isw->iface_work);
+ list_add(&new_iface->list, &iface_stat_list);
+ return new_iface;
+}
+
+static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
+ struct iface_stat *iface)
+{
+ struct rtnl_link_stats64 dev_stats, *stats;
+ bool stats_rewound;
+
+ stats = dev_get_stats(net_dev, &dev_stats);
+ /* No empty packets */
+ stats_rewound =
+ (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
+ || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
+
+ IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
+ "bytes rx/tx=%llu/%llu "
+ "active=%d last_known=%d "
+ "stats_rewound=%d\n", __func__,
+ net_dev ? net_dev->name : "?",
+ iface, net_dev,
+ stats->rx_bytes, stats->tx_bytes,
+ iface->active, iface->last_known_valid, stats_rewound);
+
+ if (iface->active && iface->last_known_valid && stats_rewound) {
+ pr_warn_once("qtaguid: iface_stat: %s(%s): "
+ "iface reset its stats unexpectedly\n", __func__,
+ net_dev->name);
+
+ iface->totals_via_dev[IFS_TX].bytes +=
+ iface->last_known[IFS_TX].bytes;
+ iface->totals_via_dev[IFS_TX].packets +=
+ iface->last_known[IFS_TX].packets;
+ iface->totals_via_dev[IFS_RX].bytes +=
+ iface->last_known[IFS_RX].bytes;
+ iface->totals_via_dev[IFS_RX].packets +=
+ iface->last_known[IFS_RX].packets;
+ iface->last_known_valid = false;
+ IF_DEBUG("qtaguid: %s(%s): iface=%p "
+ "used last known bytes rx/tx=%llu/%llu\n", __func__,
+ iface->ifname, iface, iface->last_known[IFS_RX].bytes,
+ iface->last_known[IFS_TX].bytes);
+ }
+}
+
+/*
+ * Create a new entry for tracking the specified interface.
+ * Do nothing if the entry already exists.
+ * Called when an interface is configured with a valid IP address.
+ */
+static void iface_stat_create(struct net_device *net_dev,
+ struct in_ifaddr *ifa)
+{
+ struct in_device *in_dev = NULL;
+ const char *ifname;
+ struct iface_stat *entry;
+ __be32 ipaddr = 0;
+ struct iface_stat *new_iface;
+
+ IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
+ net_dev ? net_dev->name : "?",
+ ifa, net_dev);
+ if (!net_dev) {
+ pr_err("qtaguid: iface_stat: create(): no net dev\n");
+ return;
+ }
+
+ ifname = net_dev->name;
+ if (!ifa) {
+ in_dev = in_dev_get(net_dev);
+ if (!in_dev) {
+ pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
+ ifname);
+ return;
+ }
+ IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
+ ifname, in_dev);
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ IF_DEBUG("qtaguid: iface_stat: create(%s): "
+ "ifa=%p ifa_label=%s\n",
+ ifname, ifa,
+ ifa->ifa_label ? ifa->ifa_label : "(null)");
+ if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
+ break;
+ }
+ }
+
+ if (!ifa) {
+ IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
+ ifname);
+ goto done_put;
+ }
+ ipaddr = ifa->ifa_local;
+
+ spin_lock_bh(&iface_stat_list_lock);
+ entry = get_iface_entry(ifname);
+ if (entry != NULL) {
+ IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
+ ifname, entry);
+ iface_check_stats_reset_and_adjust(net_dev, entry);
+ _iface_stat_set_active(entry, net_dev, true);
+ IF_DEBUG("qtaguid: %s(%s): "
+ "tracking now %d on ip=%pI4\n", __func__,
+ entry->ifname, true, &ipaddr);
+ goto done_unlock_put;
+ }
+
+ new_iface = iface_alloc(net_dev);
+ IF_DEBUG("qtaguid: iface_stat: create(%s): done "
+ "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
+done_unlock_put:
+ spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+ if (in_dev)
+ in_dev_put(in_dev);
+}
+
+static void iface_stat_create_ipv6(struct net_device *net_dev,
+ struct inet6_ifaddr *ifa)
+{
+ struct in_device *in_dev;
+ const char *ifname;
+ struct iface_stat *entry;
+ struct iface_stat *new_iface;
+ int addr_type;
+
+ IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
+ ifa, net_dev, net_dev ? net_dev->name : "");
+ if (!net_dev) {
+ pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
+ return;
+ }
+ ifname = net_dev->name;
+
+ in_dev = in_dev_get(net_dev);
+ if (!in_dev) {
+ pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
+ ifname);
+ return;
+ }
+
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
+ ifname, in_dev);
+
+ if (!ifa) {
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
+ ifname);
+ goto done_put;
+ }
+ addr_type = ipv6_addr_type(&ifa->addr);
+
+ spin_lock_bh(&iface_stat_list_lock);
+ entry = get_iface_entry(ifname);
+ if (entry != NULL) {
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+ ifname, entry);
+ iface_check_stats_reset_and_adjust(net_dev, entry);
+ _iface_stat_set_active(entry, net_dev, true);
+ IF_DEBUG("qtaguid: %s(%s): "
+ "tracking now %d on ip=%pI6c\n", __func__,
+ entry->ifname, true, &ifa->addr);
+ goto done_unlock_put;
+ }
+
+ new_iface = iface_alloc(net_dev);
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
+ "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
+
+done_unlock_put:
+ spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+ in_dev_put(in_dev);
+}
+
+static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
+{
+ MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
+ return sock_tag_tree_search(&sock_tag_tree, sk);
+}
+
+static struct sock_tag *get_sock_stat(const struct sock *sk)
+{
+ struct sock_tag *sock_tag_entry;
+ MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
+ if (!sk)
+ return NULL;
+ spin_lock_bh(&sock_tag_list_lock);
+ sock_tag_entry = get_sock_stat_nl(sk);
+ spin_unlock_bh(&sock_tag_list_lock);
+ return sock_tag_entry;
+}
+
+static int ipx_proto(const struct sk_buff *skb,
+ struct xt_action_param *par)
+{
+ int thoff = 0, tproto;
+
+ switch (par->family) {
+ case NFPROTO_IPV6:
+ tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+ if (tproto < 0)
+ MT_DEBUG("%s(): transport header not found in ipv6"
+ " skb=%p\n", __func__, skb);
+ break;
+ case NFPROTO_IPV4:
+ tproto = ip_hdr(skb)->protocol;
+ break;
+ default:
+ tproto = IPPROTO_RAW;
+ }
+ return tproto;
+}
+
+static void
+data_counters_update(struct data_counters *dc, int set,
+ enum ifs_tx_rx direction, int proto, int bytes)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
+ break;
+ case IPPROTO_UDP:
+ dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
+ break;
+ case IPPROTO_IP:
+ default:
+ dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
+ 1);
+ break;
+ }
+}
+
+/*
+ * Update stats for the specified interface. Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called when an device is being unregistered.
+ */
+static void iface_stat_update(struct net_device *net_dev, bool stash_only)
+{
+ struct rtnl_link_stats64 dev_stats, *stats;
+ struct iface_stat *entry;
+
+ stats = dev_get_stats(net_dev, &dev_stats);
+ spin_lock_bh(&iface_stat_list_lock);
+ entry = get_iface_entry(net_dev->name);
+ if (entry == NULL) {
+ IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
+ net_dev->name);
+ spin_unlock_bh(&iface_stat_list_lock);
+ return;
+ }
+
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+ net_dev->name, entry);
+ if (!entry->active) {
+ IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
+ net_dev->name);
+ spin_unlock_bh(&iface_stat_list_lock);
+ return;
+ }
+
+ if (stash_only) {
+ entry->last_known[IFS_TX].bytes = stats->tx_bytes;
+ entry->last_known[IFS_TX].packets = stats->tx_packets;
+ entry->last_known[IFS_RX].bytes = stats->rx_bytes;
+ entry->last_known[IFS_RX].packets = stats->rx_packets;
+ entry->last_known_valid = true;
+ IF_DEBUG("qtaguid: %s(%s): "
+ "dev stats stashed rx/tx=%llu/%llu\n", __func__,
+ net_dev->name, stats->rx_bytes, stats->tx_bytes);
+ spin_unlock_bh(&iface_stat_list_lock);
+ return;
+ }
+ entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
+ entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
+ entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
+ entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
+ /* We don't need the last_known[] anymore */
+ entry->last_known_valid = false;
+ _iface_stat_set_active(entry, net_dev, false);
+ IF_DEBUG("qtaguid: %s(%s): "
+ "disable tracking. rx/tx=%llu/%llu\n", __func__,
+ net_dev->name, stats->rx_bytes, stats->tx_bytes);
+ spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Update stats for the specified interface from the skb.
+ * Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called on each sk.
+ */
+static void iface_stat_update_from_skb(const struct sk_buff *skb,
+ struct xt_action_param *par)
+{
+ struct iface_stat *entry;
+ const struct net_device *el_dev;
+ enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
+ int bytes = skb->len;
+ int proto;
+
+ if (!skb->dev) {
+ MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+ el_dev = par->in ? : par->out;
+ } else {
+ const struct net_device *other_dev;
+ el_dev = skb->dev;
+ other_dev = par->in ? : par->out;
+ if (el_dev != other_dev) {
+ MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+ "par->(in/out)=%p %s\n",
+ par->hooknum, el_dev, el_dev->name, other_dev,
+ other_dev->name);
+ }
+ }
+
+ if (unlikely(!el_dev)) {
+ pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
+ par->hooknum, __func__);
+ BUG();
+ } else if (unlikely(!el_dev->name)) {
+ pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
+ par->hooknum, __func__);
+ BUG();
+ } else {
+ proto = ipx_proto(skb, par);
+ MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+ par->hooknum, el_dev->name, el_dev->type,
+ par->family, proto);
+ }
+
+ spin_lock_bh(&iface_stat_list_lock);
+ entry = get_iface_entry(el_dev->name);
+ if (entry == NULL) {
+ IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
+ __func__, el_dev->name);
+ spin_unlock_bh(&iface_stat_list_lock);
+ return;
+ }
+
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+ el_dev->name, entry);
+
+ data_counters_update(&entry->totals_via_skb, 0, direction, proto,
+ bytes);
+ spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static void tag_stat_update(struct tag_stat *tag_entry,
+ enum ifs_tx_rx direction, int proto, int bytes)
+{
+ int active_set;
+ active_set = get_active_counter_set(tag_entry->tn.tag);
+ MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
+ "dir=%d proto=%d bytes=%d)\n",
+ tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
+ active_set, direction, proto, bytes);
+ data_counters_update(&tag_entry->counters, active_set, direction,
+ proto, bytes);
+ if (tag_entry->parent_counters)
+ data_counters_update(tag_entry->parent_counters, active_set,
+ direction, proto, bytes);
+}
+
+/*
+ * Create a new entry for tracking the specified {acct_tag,uid_tag} within
+ * the interface.
+ * iface_entry->tag_stat_list_lock should be held.
+ */
+static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
+ tag_t tag)
+{
+ struct tag_stat *new_tag_stat_entry = NULL;
+ IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
+ " (uid=%u)\n", __func__,
+ iface_entry, tag, get_uid_from_tag(tag));
+ new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
+ if (!new_tag_stat_entry) {
+ pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
+ goto done;
+ }
+ new_tag_stat_entry->tn.tag = tag;
+ tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
+done:
+ return new_tag_stat_entry;
+}
+
+static void if_tag_stat_update(const char *ifname, uid_t uid,
+ const struct sock *sk, enum ifs_tx_rx direction,
+ int proto, int bytes)
+{
+ struct tag_stat *tag_stat_entry;
+ tag_t tag, acct_tag;
+ tag_t uid_tag;
+ struct data_counters *uid_tag_counters;
+ struct sock_tag *sock_tag_entry;
+ struct iface_stat *iface_entry;
+ struct tag_stat *new_tag_stat = NULL;
+ MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
+ "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
+ ifname, uid, sk, direction, proto, bytes);
+
+
+ iface_entry = get_iface_entry(ifname);
+ if (!iface_entry) {
+ pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
+ "%s not found\n", ifname);
+ return;
+ }
+ /* It is ok to process data when an iface_entry is inactive */
+
+ MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
+ ifname, iface_entry);
+
+ /*
+ * Look for a tagged sock.
+ * It will have an acct_uid.
+ */
+ sock_tag_entry = get_sock_stat(sk);
+ if (sock_tag_entry) {
+ tag = sock_tag_entry->tag;
+ acct_tag = get_atag_from_tag(tag);
+ uid_tag = get_utag_from_tag(tag);
+ } else {
+ acct_tag = make_atag_from_value(0);
+ tag = combine_atag_with_uid(acct_tag, uid);
+ uid_tag = make_tag_from_uid(uid);
+ }
+ MT_DEBUG("qtaguid: iface_stat: stat_update(): "
+ " looking for tag=0x%llx (uid=%u) in ife=%p\n",
+ tag, get_uid_from_tag(tag), iface_entry);
+ /* Loop over tag list under this interface for {acct_tag,uid_tag} */
+ spin_lock_bh(&iface_entry->tag_stat_list_lock);
+
+ tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+ tag);
+ if (tag_stat_entry) {
+ /*
+ * Updating the {acct_tag, uid_tag} entry handles both stats:
+ * {0, uid_tag} will also get updated.
+ */
+ tag_stat_update(tag_stat_entry, direction, proto, bytes);
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+ return;
+ }
+
+ /* Loop over tag list under this interface for {0,uid_tag} */
+ tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+ uid_tag);
+ if (!tag_stat_entry) {
+ /* Here: the base uid_tag did not exist */
+ /*
+ * No parent counters. So
+ * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
+ */
+ new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
+ if (!new_tag_stat)
+ goto unlock;
+ uid_tag_counters = &new_tag_stat->counters;
+ } else {
+ uid_tag_counters = &tag_stat_entry->counters;
+ }
+
+ if (acct_tag) {
+ /* Create the child {acct_tag, uid_tag} and hook up parent. */
+ new_tag_stat = create_if_tag_stat(iface_entry, tag);
+ if (!new_tag_stat)
+ goto unlock;
+ new_tag_stat->parent_counters = uid_tag_counters;
+ } else {
+ /*
+ * For new_tag_stat to be still NULL here would require:
+ * {0, uid_tag} exists
+ * and {acct_tag, uid_tag} doesn't exist
+ * AND acct_tag == 0.
+ * Impossible. This reassures us that new_tag_stat
+ * below will always be assigned.
+ */
+ BUG_ON(!new_tag_stat);
+ }
+ tag_stat_update(new_tag_stat, direction, proto, bytes);
+unlock:
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+}
+
+static int iface_netdev_event_handler(struct notifier_block *nb,
+ unsigned long event, void *ptr) {
+ struct net_device *dev = ptr;
+
+ if (unlikely(module_passive))
+ return NOTIFY_DONE;
+
+ IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
+ "ev=0x%lx/%s netdev=%p->name=%s\n",
+ event, netdev_evt_str(event), dev, dev ? dev->name : "");
+
+ switch (event) {
+ case NETDEV_UP:
+ iface_stat_create(dev, NULL);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ iface_stat_update(dev, event == NETDEV_DOWN);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static int iface_inet6addr_event_handler(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct net_device *dev;
+
+ if (unlikely(module_passive))
+ return NOTIFY_DONE;
+
+ IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
+ "ev=0x%lx/%s ifa=%p\n",
+ event, netdev_evt_str(event), ifa);
+
+ switch (event) {
+ case NETDEV_UP:
+ BUG_ON(!ifa || !ifa->idev);
+ dev = (struct net_device *)ifa->idev->dev;
+ iface_stat_create_ipv6(dev, ifa);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ BUG_ON(!ifa || !ifa->idev);
+ dev = (struct net_device *)ifa->idev->dev;
+ iface_stat_update(dev, event == NETDEV_DOWN);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static int iface_inetaddr_event_handler(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *dev;
+
+ if (unlikely(module_passive))
+ return NOTIFY_DONE;
+
+ IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
+ "ev=0x%lx/%s ifa=%p\n",
+ event, netdev_evt_str(event), ifa);
+
+ switch (event) {
+ case NETDEV_UP:
+ BUG_ON(!ifa || !ifa->ifa_dev);
+ dev = ifa->ifa_dev->dev;
+ iface_stat_create(dev, ifa);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ BUG_ON(!ifa || !ifa->ifa_dev);
+ dev = ifa->ifa_dev->dev;
+ iface_stat_update(dev, event == NETDEV_DOWN);
+ atomic64_inc(&qtu_events.iface_events);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block iface_netdev_notifier_blk = {
+ .notifier_call = iface_netdev_event_handler,
+};
+
+static struct notifier_block iface_inetaddr_notifier_blk = {
+ .notifier_call = iface_inetaddr_event_handler,
+};
+
+static struct notifier_block iface_inet6addr_notifier_blk = {
+ .notifier_call = iface_inet6addr_event_handler,
+};
+
+static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
+ .start = iface_stat_fmt_proc_start,
+ .next = iface_stat_fmt_proc_next,
+ .stop = iface_stat_fmt_proc_stop,
+ .show = iface_stat_fmt_proc_show,
+};
+
+static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
+{
+ struct proc_iface_stat_fmt_info *s;
+
+ s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
+ sizeof(struct proc_iface_stat_fmt_info));
+ if (!s)
+ return -ENOMEM;
+
+ s->fmt = (uintptr_t)PDE_DATA(inode);
+ return 0;
+}
+
+static const struct file_operations proc_iface_stat_fmt_fops = {
+ .open = proc_iface_stat_fmt_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
+{
+ int err;
+
+ iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
+ if (!iface_stat_procdir) {
+ pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
+ err = -1;
+ goto err;
+ }
+
+ iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
+ proc_iface_perms,
+ parent_procdir,
+ &proc_iface_stat_fmt_fops,
+ (void *)1 /* fmt1 */);
+ if (!iface_stat_all_procfile) {
+ pr_err("qtaguid: iface_stat: init "
+ " failed to create stat_old proc entry\n");
+ err = -1;
+ goto err_zap_entry;
+ }
+
+ iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
+ proc_iface_perms,
+ parent_procdir,
+ &proc_iface_stat_fmt_fops,
+ (void *)2 /* fmt2 */);
+ if (!iface_stat_fmt_procfile) {
+ pr_err("qtaguid: iface_stat: init "
+ " failed to create stat_all proc entry\n");
+ err = -1;
+ goto err_zap_all_stats_entry;
+ }
+
+
+ err = register_netdevice_notifier(&iface_netdev_notifier_blk);
+ if (err) {
+ pr_err("qtaguid: iface_stat: init "
+ "failed to register dev event handler\n");
+ goto err_zap_all_stats_entries;
+ }
+ err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+ if (err) {
+ pr_err("qtaguid: iface_stat: init "
+ "failed to register ipv4 dev event handler\n");
+ goto err_unreg_nd;
+ }
+
+ err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
+ if (err) {
+ pr_err("qtaguid: iface_stat: init "
+ "failed to register ipv6 dev event handler\n");
+ goto err_unreg_ip4_addr;
+ }
+ return 0;
+
+err_unreg_ip4_addr:
+ unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+err_unreg_nd:
+ unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entries:
+ remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
+err_zap_all_stats_entry:
+ remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
+err_zap_entry:
+ remove_proc_entry(iface_stat_procdirname, parent_procdir);
+err:
+ return err;
+}
+
+static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
+ struct xt_action_param *par)
+{
+ struct sock *sk;
+ unsigned int hook_mask = (1 << par->hooknum);
+
+ MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
+ par->hooknum, par->family);
+
+ /*
+ * Let's not abuse the the xt_socket_get*_sk(), or else it will
+ * return garbage SKs.
+ */
+ if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
+ return NULL;
+
+ switch (par->family) {
+ case NFPROTO_IPV6:
+ sk = xt_socket_get6_sk(skb, par);
+ break;
+ case NFPROTO_IPV4:
+ sk = xt_socket_get4_sk(skb, par);
+ break;
+ default:
+ return NULL;
+ }
+
+ if (sk) {
+ MT_DEBUG("qtaguid: %p->sk_proto=%u "
+ "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
+ /*
+ * When in TCP_TIME_WAIT the sk is not a "struct sock" but
+ * "struct inet_timewait_sock" which is missing fields.
+ */
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ xt_socket_put_sk(sk);
+ sk = NULL;
+ }
+ }
+ return sk;
+}
+
+static void account_for_uid(const struct sk_buff *skb,
+ const struct sock *alternate_sk, uid_t uid,
+ struct xt_action_param *par)
+{
+ const struct net_device *el_dev;
+
+ if (!skb->dev) {
+ MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+ el_dev = par->in ? : par->out;
+ } else {
+ const struct net_device *other_dev;
+ el_dev = skb->dev;
+ other_dev = par->in ? : par->out;
+ if (el_dev != other_dev) {
+ MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+ "par->(in/out)=%p %s\n",
+ par->hooknum, el_dev, el_dev->name, other_dev,
+ other_dev->name);
+ }
+ }
+
+ if (unlikely(!el_dev)) {
+ pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
+ } else if (unlikely(!el_dev->name)) {
+ pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
+ } else {
+ int proto = ipx_proto(skb, par);
+ MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+ par->hooknum, el_dev->name, el_dev->type,
+ par->family, proto);
+
+ if_tag_stat_update(el_dev->name, uid,
+ skb->sk ? skb->sk : alternate_sk,
+ par->in ? IFS_RX : IFS_TX,
+ proto, skb->len);
+ }
+}
+
+static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_qtaguid_match_info *info = par->matchinfo;
+ const struct file *filp;
+ bool got_sock = false;
+ struct sock *sk;
+ uid_t sock_uid;
+ bool res;
+
+ if (unlikely(module_passive))
+ return (info->match ^ info->invert) == 0;
+
+ MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
+ par->hooknum, skb, par->in, par->out, par->family);
+
+ atomic64_inc(&qtu_events.match_calls);
+ if (skb == NULL) {
+ res = (info->match ^ info->invert) == 0;
+ goto ret_res;
+ }
+
+ switch (par->hooknum) {
+ case NF_INET_PRE_ROUTING:
+ case NF_INET_POST_ROUTING:
+ atomic64_inc(&qtu_events.match_calls_prepost);
+ iface_stat_update_from_skb(skb, par);
+ /*
+ * We are done in pre/post. The skb will get processed
+ * further alter.
+ */
+ res = (info->match ^ info->invert);
+ goto ret_res;
+ break;
+ /* default: Fall through and do UID releated work */
+ }
+
+ sk = skb->sk;
+ /*
+ * When in TCP_TIME_WAIT the sk is not a "struct sock" but
+ * "struct inet_timewait_sock" which is missing fields.
+ * So we ignore it.
+ */
+ if (sk && sk->sk_state == TCP_TIME_WAIT)
+ sk = NULL;
+ if (sk == NULL) {
+ /*
+ * A missing sk->sk_socket happens when packets are in-flight
+ * and the matching socket is already closed and gone.
+ */
+ sk = qtaguid_find_sk(skb, par);
+ /*
+ * If we got the socket from the find_sk(), we will need to put
+ * it back, as nf_tproxy_get_sock_v4() got it.
+ */
+ got_sock = sk;
+ if (sk)
+ atomic64_inc(&qtu_events.match_found_sk_in_ct);
+ else
+ atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
+ } else {
+ atomic64_inc(&qtu_events.match_found_sk);
+ }
+ MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
+ par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
+ if (sk != NULL) {
+ MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
+ par->hooknum, sk, sk->sk_socket,
+ sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
+ filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+ MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
+ par->hooknum, filp ? filp->f_cred->fsuid : -1);
+ }
+
+ if (sk == NULL || sk->sk_socket == NULL) {
+ /*
+ * Here, the qtaguid_find_sk() using connection tracking
+ * couldn't find the owner, so for now we just count them
+ * against the system.
+ */
+ /*
+ * TODO: unhack how to force just accounting.
+ * For now we only do iface stats when the uid-owner is not
+ * requested.
+ */
+ if (!(info->match & XT_QTAGUID_UID))
+ account_for_uid(skb, sk, 0, par);
+ MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
+ par->hooknum,
+ sk ? sk->sk_socket : NULL);
+ res = (info->match ^ info->invert) == 0;
+ atomic64_inc(&qtu_events.match_no_sk);
+ goto put_sock_ret_res;
+ } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
+ res = false;
+ goto put_sock_ret_res;
+ }
+ filp = sk->sk_socket->file;
+ if (filp == NULL) {
+ MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
+ account_for_uid(skb, sk, 0, par);
+ res = ((info->match ^ info->invert) &
+ (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
+ atomic64_inc(&qtu_events.match_no_sk_file);
+ goto put_sock_ret_res;
+ }
+ sock_uid = filp->f_cred->fsuid;
+ /*
+ * TODO: unhack how to force just accounting.
+ * For now we only do iface stats when the uid-owner is not requested
+ */
+ if (!(info->match & XT_QTAGUID_UID))
+ account_for_uid(skb, sk, sock_uid, par);
+
+ /*
+ * The following two tests fail the match when:
+ * id not in range AND no inverted condition requested
+ * or id in range AND inverted condition requested
+ * Thus (!a && b) || (a && !b) == a ^ b
+ */
+ if (info->match & XT_QTAGUID_UID)
+ if ((filp->f_cred->fsuid >= info->uid_min &&
+ filp->f_cred->fsuid <= info->uid_max) ^
+ !(info->invert & XT_QTAGUID_UID)) {
+ MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
+ par->hooknum);
+ res = false;
+ goto put_sock_ret_res;
+ }
+ if (info->match & XT_QTAGUID_GID)
+ if ((filp->f_cred->fsgid >= info->gid_min &&
+ filp->f_cred->fsgid <= info->gid_max) ^
+ !(info->invert & XT_QTAGUID_GID)) {
+ MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
+ par->hooknum);
+ res = false;
+ goto put_sock_ret_res;
+ }
+
+ MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
+ res = true;
+
+put_sock_ret_res:
+ if (got_sock)
+ xt_socket_put_sk(sk);
+ret_res:
+ MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
+ return res;
+}
+
+#ifdef DDEBUG
+/* This function is not in xt_qtaguid_print.c because of locks visibility */
+static void prdebug_full_state(int indent_level, const char *fmt, ...)
+{
+ va_list args;
+ char *fmt_buff;
+ char *buff;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ fmt_buff = kasprintf(GFP_ATOMIC,
+ "qtaguid: %s(): %s {\n", __func__, fmt);
+ BUG_ON(!fmt_buff);
+ va_start(args, fmt);
+ buff = kvasprintf(GFP_ATOMIC,
+ fmt_buff, args);
+ BUG_ON(!buff);
+ pr_debug("%s", buff);
+ kfree(fmt_buff);
+ kfree(buff);
+ va_end(args);
+
+ spin_lock_bh(&sock_tag_list_lock);
+ prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
+ spin_unlock_bh(&sock_tag_list_lock);
+
+ spin_lock_bh(&sock_tag_list_lock);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
+ prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ spin_unlock_bh(&sock_tag_list_lock);
+
+ spin_lock_bh(&iface_stat_list_lock);
+ prdebug_iface_stat_list(indent_level, &iface_stat_list);
+ spin_unlock_bh(&iface_stat_list_lock);
+
+ pr_debug("qtaguid: %s(): }\n", __func__);
+}
+#else
+static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
+#endif
+
+struct proc_ctrl_print_info {
+ struct sock *sk; /* socket found by reading to sk_pos */
+ loff_t sk_pos;
+};
+
+static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct proc_ctrl_print_info *pcpi = m->private;
+ struct sock_tag *sock_tag_entry = v;
+ struct rb_node *node;
+
+ (*pos)++;
+
+ if (!v || v == SEQ_START_TOKEN)
+ return NULL;
+
+ node = rb_next(&sock_tag_entry->sock_node);
+ if (!node) {
+ pcpi->sk = NULL;
+ sock_tag_entry = SEQ_START_TOKEN;
+ } else {
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+ pcpi->sk = sock_tag_entry->sk;
+ }
+ pcpi->sk_pos = *pos;
+ return sock_tag_entry;
+}
+
+static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
+{
+ struct proc_ctrl_print_info *pcpi = m->private;
+ struct sock_tag *sock_tag_entry;
+ struct rb_node *node;
+
+ spin_lock_bh(&sock_tag_list_lock);
+
+ if (unlikely(module_passive))
+ return NULL;
+
+ if (*pos == 0) {
+ pcpi->sk_pos = 0;
+ node = rb_first(&sock_tag_tree);
+ if (!node) {
+ pcpi->sk = NULL;
+ return SEQ_START_TOKEN;
+ }
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+ pcpi->sk = sock_tag_entry->sk;
+ } else {
+ sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
+ NULL) ?: SEQ_START_TOKEN;
+ if (*pos != pcpi->sk_pos) {
+ /* seq_read skipped a next call */
+ *pos = pcpi->sk_pos;
+ return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
+ }
+ }
+ return sock_tag_entry;
+}
+
+static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
+{
+ spin_unlock_bh(&sock_tag_list_lock);
+}
+
+/*
+ * Procfs reader to get all active socket tags using style "1)" as described in
+ * fs/proc/generic.c
+ */
+static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
+{
+ struct sock_tag *sock_tag_entry = v;
+ uid_t uid;
+ long f_count;
+
+ CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
+ current->pid, current->tgid, current_fsuid());
+
+ if (sock_tag_entry != SEQ_START_TOKEN) {
+ uid = get_uid_from_tag(sock_tag_entry->tag);
+ CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
+ "pid=%u\n",
+ sock_tag_entry->sk,
+ sock_tag_entry->tag,
+ uid,
+ sock_tag_entry->pid
+ );
+ f_count = atomic_long_read(
+ &sock_tag_entry->socket->file->f_count);
+ seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
+ "f_count=%lu\n",
+ sock_tag_entry->sk,
+ sock_tag_entry->tag, uid,
+ sock_tag_entry->pid, f_count);
+ } else {
+ seq_printf(m, "events: sockets_tagged=%llu "
+ "sockets_untagged=%llu "
+ "counter_set_changes=%llu "
+ "delete_cmds=%llu "
+ "iface_events=%llu "
+ "match_calls=%llu "
+ "match_calls_prepost=%llu "
+ "match_found_sk=%llu "
+ "match_found_sk_in_ct=%llu "
+ "match_found_no_sk_in_ct=%llu "
+ "match_no_sk=%llu "
+ "match_no_sk_file=%llu\n",
+ atomic64_read(&qtu_events.sockets_tagged),
+ atomic64_read(&qtu_events.sockets_untagged),
+ atomic64_read(&qtu_events.counter_set_changes),
+ atomic64_read(&qtu_events.delete_cmds),
+ atomic64_read(&qtu_events.iface_events),
+ atomic64_read(&qtu_events.match_calls),
+ atomic64_read(&qtu_events.match_calls_prepost),
+ atomic64_read(&qtu_events.match_found_sk),
+ atomic64_read(&qtu_events.match_found_sk_in_ct),
+ atomic64_read(&qtu_events.match_found_no_sk_in_ct),
+ atomic64_read(&qtu_events.match_no_sk),
+ atomic64_read(&qtu_events.match_no_sk_file));
+
+ /* Count the following as part of the last item_index */
+ prdebug_full_state(0, "proc ctrl");
+ }
+
+ return 0;
+}
+
+/*
+ * Delete socket tags, and stat tags associated with a given
+ * accouting tag and uid.
+ */
+static int ctrl_cmd_delete(const char *input)
+{
+ char cmd;
+ uid_t uid;
+ uid_t entry_uid;
+ tag_t acct_tag;
+ tag_t tag;
+ int res, argc;
+ struct iface_stat *iface_entry;
+ struct rb_node *node;
+ struct sock_tag *st_entry;
+ struct rb_root st_to_free_tree = RB_ROOT;
+ struct tag_stat *ts_entry;
+ struct tag_counter_set *tcs_entry;
+ struct tag_ref *tr_entry;
+ struct uid_tag_data *utd_entry;
+
+ argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
+ CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
+ "user_tag=0x%llx uid=%u\n", input, argc, cmd,
+ acct_tag, uid);
+ if (argc < 2) {
+ res = -EINVAL;
+ goto err;
+ }
+ if (!valid_atag(acct_tag)) {
+ pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
+ res = -EINVAL;
+ goto err;
+ }
+ if (argc < 3) {
+ uid = current_fsuid();
+ } else if (!can_impersonate_uid(uid)) {
+ pr_info("qtaguid: ctrl_delete(%s): "
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
+ res = -EPERM;
+ goto err;
+ }
+
+ tag = combine_atag_with_uid(acct_tag, uid);
+ CT_DEBUG("qtaguid: ctrl_delete(%s): "
+ "looking for tag=0x%llx (uid=%u)\n",
+ input, tag, uid);
+
+ /* Delete socket tags */
+ spin_lock_bh(&sock_tag_list_lock);
+ node = rb_first(&sock_tag_tree);
+ while (node) {
+ st_entry = rb_entry(node, struct sock_tag, sock_node);
+ entry_uid = get_uid_from_tag(st_entry->tag);
+ node = rb_next(node);
+ if (entry_uid != uid)
+ continue;
+
+ CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
+ input, st_entry->tag, entry_uid);
+
+ if (!acct_tag || st_entry->tag == tag) {
+ rb_erase(&st_entry->sock_node, &sock_tag_tree);
+ /* Can't sockfd_put() within spinlock, do it later. */
+ sock_tag_tree_insert(st_entry, &st_to_free_tree);
+ tr_entry = lookup_tag_ref(st_entry->tag, NULL);
+ BUG_ON(tr_entry->num_sock_tags <= 0);
+ tr_entry->num_sock_tags--;
+ /*
+ * TODO: remove if, and start failing.
+ * This is a hack to work around the fact that in some
+ * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
+ * and are trying to work around apps
+ * that didn't open the /dev/xt_qtaguid.
+ */
+ if (st_entry->list.next && st_entry->list.prev)
+ list_del(&st_entry->list);
+ }
+ }
+ spin_unlock_bh(&sock_tag_list_lock);
+
+ sock_tag_tree_erase(&st_to_free_tree);
+
+ /* Delete tag counter-sets */
+ spin_lock_bh(&tag_counter_set_list_lock);
+ /* Counter sets are only on the uid tag, not full tag */
+ tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+ if (tcs_entry) {
+ CT_DEBUG("qtaguid: ctrl_delete(%s): "
+ "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
+ input,
+ tcs_entry->tn.tag,
+ get_uid_from_tag(tcs_entry->tn.tag),
+ tcs_entry->active_set);
+ rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
+ kfree(tcs_entry);
+ }
+ spin_unlock_bh(&tag_counter_set_list_lock);
+
+ /*
+ * If acct_tag is 0, then all entries belonging to uid are
+ * erased.
+ */
+ spin_lock_bh(&iface_stat_list_lock);
+ list_for_each_entry(iface_entry, &iface_stat_list, list) {
+ spin_lock_bh(&iface_entry->tag_stat_list_lock);
+ node = rb_first(&iface_entry->tag_stat_tree);
+ while (node) {
+ ts_entry = rb_entry(node, struct tag_stat, tn.node);
+ entry_uid = get_uid_from_tag(ts_entry->tn.tag);
+ node = rb_next(node);
+
+ CT_DEBUG("qtaguid: ctrl_delete(%s): "
+ "ts tag=0x%llx (uid=%u)\n",
+ input, ts_entry->tn.tag, entry_uid);
+
+ if (entry_uid != uid)
+ continue;
+ if (!acct_tag || ts_entry->tn.tag == tag) {
+ CT_DEBUG("qtaguid: ctrl_delete(%s): "
+ "erase ts: %s 0x%llx %u\n",
+ input, iface_entry->ifname,
+ get_atag_from_tag(ts_entry->tn.tag),
+ entry_uid);
+ rb_erase(&ts_entry->tn.node,
+ &iface_entry->tag_stat_tree);
+ kfree(ts_entry);
+ }
+ }
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+ }
+ spin_unlock_bh(&iface_stat_list_lock);
+
+ /* Cleanup the uid_tag_data */
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ node = rb_first(&uid_tag_data_tree);
+ while (node) {
+ utd_entry = rb_entry(node, struct uid_tag_data, node);
+ entry_uid = utd_entry->uid;
+ node = rb_next(node);
+
+ CT_DEBUG("qtaguid: ctrl_delete(%s): "
+ "utd uid=%u\n",
+ input, entry_uid);
+
+ if (entry_uid != uid)
+ continue;
+ /*
+ * Go over the tag_refs, and those that don't have
+ * sock_tags using them are freed.
+ */
+ put_tag_ref_tree(tag, utd_entry);
+ put_utd_entry(utd_entry);
+ }
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+
+ atomic64_inc(&qtu_events.delete_cmds);
+ res = 0;
+
+err:
+ return res;
+}
+
+static int ctrl_cmd_counter_set(const char *input)
+{
+ char cmd;
+ uid_t uid = 0;
+ tag_t tag;
+ int res, argc;
+ struct tag_counter_set *tcs;
+ int counter_set;
+
+ argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
+ CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
+ "set=%d uid=%u\n", input, argc, cmd,
+ counter_set, uid);
+ if (argc != 3) {
+ res = -EINVAL;
+ goto err;
+ }
+ if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
+ pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
+ input);
+ res = -EINVAL;
+ goto err;
+ }
+ if (!can_manipulate_uids()) {
+ pr_info("qtaguid: ctrl_counterset(%s): "
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
+ res = -EPERM;
+ goto err;
+ }
+
+ tag = make_tag_from_uid(uid);
+ spin_lock_bh(&tag_counter_set_list_lock);
+ tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+ if (!tcs) {
+ tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
+ if (!tcs) {
+ spin_unlock_bh(&tag_counter_set_list_lock);
+ pr_err("qtaguid: ctrl_counterset(%s): "
+ "failed to alloc counter set\n",
+ input);
+ res = -ENOMEM;
+ goto err;
+ }
+ tcs->tn.tag = tag;
+ tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
+ CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
+ "(uid=%u) set=%d\n",
+ input, tag, get_uid_from_tag(tag), counter_set);
+ }
+ tcs->active_set = counter_set;
+ spin_unlock_bh(&tag_counter_set_list_lock);
+ atomic64_inc(&qtu_events.counter_set_changes);
+ res = 0;
+
+err:
+ return res;
+}
+
+static int ctrl_cmd_tag(const char *input)
+{
+ char cmd;
+ int sock_fd = 0;
+ uid_t uid = 0;
+ tag_t acct_tag = make_atag_from_value(0);
+ tag_t full_tag;
+ struct socket *el_socket;
+ int res, argc;
+ struct sock_tag *sock_tag_entry;
+ struct tag_ref *tag_ref_entry;
+ struct uid_tag_data *uid_tag_data_entry;
+ struct proc_qtu_data *pqd_entry;
+
+ /* Unassigned args will get defaulted later. */
+ argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
+ CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
+ "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
+ acct_tag, uid);
+ if (argc < 2) {
+ res = -EINVAL;
+ goto err;
+ }
+ el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
+ if (!el_socket) {
+ pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
+ " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+ input, sock_fd, res, current->pid, current->tgid,
+ current_fsuid());
+ goto err;
+ }
+ CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
+ input, atomic_long_read(&el_socket->file->f_count),
+ el_socket->sk);
+ if (argc < 3) {
+ acct_tag = make_atag_from_value(0);
+ } else if (!valid_atag(acct_tag)) {
+ pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
+ res = -EINVAL;
+ goto err_put;
+ }
+ CT_DEBUG("qtaguid: ctrl_tag(%s): "
+ "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
+ "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
+ input, current->pid, current->tgid, current_uid(),
+ current_euid(), current_fsuid(),
+ xt_qtaguid_ctrl_file->gid,
+ in_group_p(xt_qtaguid_ctrl_file->gid),
+ in_egroup_p(xt_qtaguid_ctrl_file->gid));
+ if (argc < 4) {
+ uid = current_fsuid();
+ } else if (!can_impersonate_uid(uid)) {
+ pr_info("qtaguid: ctrl_tag(%s): "
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
+ res = -EPERM;
+ goto err_put;
+ }
+ full_tag = combine_atag_with_uid(acct_tag, uid);
+
+ spin_lock_bh(&sock_tag_list_lock);
+ sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+ tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
+ if (IS_ERR(tag_ref_entry)) {
+ res = PTR_ERR(tag_ref_entry);
+ spin_unlock_bh(&sock_tag_list_lock);
+ goto err_put;
+ }
+ tag_ref_entry->num_sock_tags++;
+ if (sock_tag_entry) {
+ struct tag_ref *prev_tag_ref_entry;
+
+ CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
+ "st@%p ...->f_count=%ld\n",
+ input, el_socket->sk, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count));
+ /*
+ * This is a re-tagging, so release the sock_fd that was
+ * locked at the time of the 1st tagging.
+ * There is still the ref from this call's sockfd_lookup() so
+ * it can be done within the spinlock.
+ */
+ sockfd_put(sock_tag_entry->socket);
+ prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
+ &uid_tag_data_entry);
+ BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
+ BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
+ prev_tag_ref_entry->num_sock_tags--;
+ sock_tag_entry->tag = full_tag;
+ } else {
+ CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
+ input, el_socket->sk);
+ sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
+ GFP_ATOMIC);
+ if (!sock_tag_entry) {
+ pr_err("qtaguid: ctrl_tag(%s): "
+ "socket tag alloc failed\n",
+ input);
+ spin_unlock_bh(&sock_tag_list_lock);
+ res = -ENOMEM;
+ goto err_tag_unref_put;
+ }
+ sock_tag_entry->sk = el_socket->sk;
+ sock_tag_entry->socket = el_socket;
+ sock_tag_entry->pid = current->tgid;
+ sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
+ uid);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ pqd_entry = proc_qtu_data_tree_search(
+ &proc_qtu_data_tree, current->tgid);
+ /*
+ * TODO: remove if, and start failing.
+ * At first, we want to catch user-space code that is not
+ * opening the /dev/xt_qtaguid.
+ */
+ if (IS_ERR_OR_NULL(pqd_entry))
+ pr_warn_once(
+ "qtaguid: %s(): "
+ "User space forgot to open /dev/xt_qtaguid? "
+ "pid=%u tgid=%u uid=%u\n", __func__,
+ current->pid, current->tgid,
+ current_fsuid());
+ else
+ list_add(&sock_tag_entry->list,
+ &pqd_entry->sock_tag_list);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+
+ sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
+ atomic64_inc(&qtu_events.sockets_tagged);
+ }
+ spin_unlock_bh(&sock_tag_list_lock);
+ /* We keep the ref to the socket (file) until it is untagged */
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
+ input, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count));
+ return 0;
+
+err_tag_unref_put:
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+ tag_ref_entry->num_sock_tags--;
+ free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
+err_put:
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
+ input, atomic_long_read(&el_socket->file->f_count) - 1);
+ /* Release the sock_fd that was grabbed by sockfd_lookup(). */
+ sockfd_put(el_socket);
+ return res;
+
+err:
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
+ return res;
+}
+
+static int ctrl_cmd_untag(const char *input)
+{
+ char cmd;
+ int sock_fd = 0;
+ struct socket *el_socket;
+ int res, argc;
+ struct sock_tag *sock_tag_entry;
+ struct tag_ref *tag_ref_entry;
+ struct uid_tag_data *utd_entry;
+ struct proc_qtu_data *pqd_entry;
+
+ argc = sscanf(input, "%c %d", &cmd, &sock_fd);
+ CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
+ input, argc, cmd, sock_fd);
+ if (argc < 2) {
+ res = -EINVAL;
+ goto err;
+ }
+ el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
+ if (!el_socket) {
+ pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
+ " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+ input, sock_fd, res, current->pid, current->tgid,
+ current_fsuid());
+ goto err;
+ }
+ CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
+ input, atomic_long_read(&el_socket->file->f_count),
+ el_socket->sk);
+ spin_lock_bh(&sock_tag_list_lock);
+ sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+ if (!sock_tag_entry) {
+ spin_unlock_bh(&sock_tag_list_lock);
+ res = -EINVAL;
+ goto err_put;
+ }
+ /*
+ * The socket already belongs to the current process
+ * so it can do whatever it wants to it.
+ */
+ rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
+
+ tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
+ BUG_ON(!tag_ref_entry);
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ pqd_entry = proc_qtu_data_tree_search(
+ &proc_qtu_data_tree, current->tgid);
+ /*
+ * TODO: remove if, and start failing.
+ * At first, we want to catch user-space code that is not
+ * opening the /dev/xt_qtaguid.
+ */
+ if (IS_ERR_OR_NULL(pqd_entry))
+ pr_warn_once("qtaguid: %s(): "
+ "User space forgot to open /dev/xt_qtaguid? "
+ "pid=%u tgid=%u uid=%u\n", __func__,
+ current->pid, current->tgid, current_fsuid());
+ else
+ list_del(&sock_tag_entry->list);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ /*
+ * We don't free tag_ref from the utd_entry here,
+ * only during a cmd_delete().
+ */
+ tag_ref_entry->num_sock_tags--;
+ spin_unlock_bh(&sock_tag_list_lock);
+ /*
+ * Release the sock_fd that was grabbed at tag time,
+ * and once more for the sockfd_lookup() here.
+ */
+ sockfd_put(sock_tag_entry->socket);
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
+ input, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count) - 1);
+ sockfd_put(el_socket);
+
+ kfree(sock_tag_entry);
+ atomic64_inc(&qtu_events.sockets_untagged);
+
+ return 0;
+
+err_put:
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
+ input, atomic_long_read(&el_socket->file->f_count) - 1);
+ /* Release the sock_fd that was grabbed by sockfd_lookup(). */
+ sockfd_put(el_socket);
+ return res;
+
+err:
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
+ return res;
+}
+
+static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
+{
+ char cmd;
+ ssize_t res;
+
+ CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
+
+ cmd = input[0];
+ /* Collect params for commands */
+ switch (cmd) {
+ case 'd':
+ res = ctrl_cmd_delete(input);
+ break;
+
+ case 's':
+ res = ctrl_cmd_counter_set(input);
+ break;
+
+ case 't':
+ res = ctrl_cmd_tag(input);
+ break;
+
+ case 'u':
+ res = ctrl_cmd_untag(input);
+ break;
+
+ default:
+ res = -EINVAL;
+ goto err;
+ }
+ if (!res)
+ res = count;
+err:
+ CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
+ return res;
+}
+
+#define MAX_QTAGUID_CTRL_INPUT_LEN 255
+static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
+
+ if (unlikely(module_passive))
+ return count;
+
+ if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
+ return -EINVAL;
+
+ if (copy_from_user(input_buf, buffer, count))
+ return -EFAULT;
+
+ input_buf[count] = '\0';
+ return qtaguid_ctrl_parse(input_buf, count);
+}
+
+struct proc_print_info {
+ struct iface_stat *iface_entry;
+ int item_index;
+ tag_t tag; /* tag found by reading to tag_pos */
+ off_t tag_pos;
+ int tag_item_index;
+};
+
+static void pp_stats_header(struct seq_file *m)
+{
+ seq_puts(m,
+ "idx iface acct_tag_hex uid_tag_int cnt_set "
+ "rx_bytes rx_packets "
+ "tx_bytes tx_packets "
+ "rx_tcp_bytes rx_tcp_packets "
+ "rx_udp_bytes rx_udp_packets "
+ "rx_other_bytes rx_other_packets "
+ "tx_tcp_bytes tx_tcp_packets "
+ "tx_udp_bytes tx_udp_packets "
+ "tx_other_bytes tx_other_packets\n");
+}
+
+static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
+ int cnt_set)
+{
+ int ret;
+ struct data_counters *cnts;
+ tag_t tag = ts_entry->tn.tag;
+ uid_t stat_uid = get_uid_from_tag(tag);
+ struct proc_print_info *ppi = m->private;
+ /* Detailed tags are not available to everybody */
+ if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
+ CT_DEBUG("qtaguid: stats line: "
+ "%s 0x%llx %u: insufficient priv "
+ "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
+ ppi->iface_entry->ifname,
+ get_atag_from_tag(tag), stat_uid,
+ current->pid, current->tgid, current_fsuid(),
+ xt_qtaguid_stats_file->gid);
+ return 0;
+ }
+ ppi->item_index++;
+ cnts = &ts_entry->counters;
+ ret = seq_printf(m, "%d %s 0x%llx %u %u "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu "
+ "%llu %llu\n",
+ ppi->item_index,
+ ppi->iface_entry->ifname,
+ get_atag_from_tag(tag),
+ stat_uid,
+ cnt_set,
+ dc_sum_bytes(cnts, cnt_set, IFS_RX),
+ dc_sum_packets(cnts, cnt_set, IFS_RX),
+ dc_sum_bytes(cnts, cnt_set, IFS_TX),
+ dc_sum_packets(cnts, cnt_set, IFS_TX),
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+ return ret ?: 1;
+}
+
+static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
+{
+ int ret;
+ int counter_set;
+ for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
+ counter_set++) {
+ ret = pp_stats_line(m, ts_entry, counter_set);
+ if (ret < 0)
+ return false;
+ }
+ return true;
+}
+
+static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
+{
+ struct iface_stat *iface_entry;
+
+ if (!ptr)
+ return false;
+
+ list_for_each_entry(iface_entry, &iface_stat_list, list)
+ if (iface_entry == ptr)
+ return true;
+ return false;
+}
+
+static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
+{
+ spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+ list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
+ spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+ return;
+ }
+ ppi->iface_entry = NULL;
+}
+
+static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct proc_print_info *ppi = m->private;
+ struct tag_stat *ts_entry;
+ struct rb_node *node;
+
+ if (!v) {
+ pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
+ return NULL;
+ }
+
+ (*pos)++;
+
+ if (!ppi->iface_entry || unlikely(module_passive))
+ return NULL;
+
+ if (v == SEQ_START_TOKEN)
+ node = rb_first(&ppi->iface_entry->tag_stat_tree);
+ else
+ node = rb_next(&((struct tag_stat *)v)->tn.node);
+
+ while (!node) {
+ qtaguid_stats_proc_next_iface_entry(ppi);
+ if (!ppi->iface_entry)
+ return NULL;
+ node = rb_first(&ppi->iface_entry->tag_stat_tree);
+ }
+
+ ts_entry = rb_entry(node, struct tag_stat, tn.node);
+ ppi->tag = ts_entry->tn.tag;
+ ppi->tag_pos = *pos;
+ ppi->tag_item_index = ppi->item_index;
+ return ts_entry;
+}
+
+static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
+{
+ struct proc_print_info *ppi = m->private;
+ struct tag_stat *ts_entry = NULL;
+
+ spin_lock_bh(&iface_stat_list_lock);
+
+ if (*pos == 0) {
+ ppi->item_index = 1;
+ ppi->tag_pos = 0;
+ if (list_empty(&iface_stat_list)) {
+ ppi->iface_entry = NULL;
+ } else {
+ ppi->iface_entry = list_first_entry(&iface_stat_list,
+ struct iface_stat,
+ list);
+ spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+ }
+ return SEQ_START_TOKEN;
+ }
+ if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
+ if (ppi->iface_entry) {
+ pr_err("qtaguid: %s(): iface_entry %p not found\n",
+ __func__, ppi->iface_entry);
+ ppi->iface_entry = NULL;
+ }
+ return NULL;
+ }
+
+ spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+
+ if (!ppi->tag_pos) {
+ /* seq_read skipped first next call */
+ ts_entry = SEQ_START_TOKEN;
+ } else {
+ ts_entry = tag_stat_tree_search(
+ &ppi->iface_entry->tag_stat_tree, ppi->tag);
+ if (!ts_entry) {
+ pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
+ __func__, ppi->tag);
+ return NULL;
+ }
+ }
+
+ if (*pos == ppi->tag_pos) { /* normal resume */
+ ppi->item_index = ppi->tag_item_index;
+ } else {
+ /* seq_read skipped a next call */
+ *pos = ppi->tag_pos;
+ ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
+ }
+
+ return ts_entry;
+}
+
+static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
+{
+ struct proc_print_info *ppi = m->private;
+ if (ppi->iface_entry)
+ spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+ spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Procfs reader to get all tag stats using style "1)" as described in
+ * fs/proc/generic.c
+ * Groups all protocols tx/rx bytes.
+ */
+static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
+{
+ struct tag_stat *ts_entry = v;
+
+ if (v == SEQ_START_TOKEN)
+ pp_stats_header(m);
+ else
+ pp_sets(m, ts_entry);
+
+ return 0;
+}
+
+/*------------------------------------------*/
+static int qtudev_open(struct inode *inode, struct file *file)
+{
+ struct uid_tag_data *utd_entry;
+ struct proc_qtu_data *pqd_entry;
+ struct proc_qtu_data *new_pqd_entry;
+ int res;
+ bool utd_entry_found;
+
+ if (unlikely(qtu_proc_handling_passive))
+ return 0;
+
+ DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
+ current->pid, current->tgid, current_fsuid());
+
+ spin_lock_bh(&uid_tag_data_tree_lock);
+
+ /* Look for existing uid data, or alloc one. */
+ utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
+ if (IS_ERR_OR_NULL(utd_entry)) {
+ res = PTR_ERR(utd_entry);
+ goto err_unlock;
+ }
+
+ /* Look for existing PID based proc_data */
+ pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
+ current->tgid);
+ if (pqd_entry) {
+ pr_err("qtaguid: qtudev_open(): %u/%u %u "
+ "%s already opened\n",
+ current->pid, current->tgid, current_fsuid(),
+ QTU_DEV_NAME);
+ res = -EBUSY;
+ goto err_unlock_free_utd;
+ }
+
+ new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
+ if (!new_pqd_entry) {
+ pr_err("qtaguid: qtudev_open(): %u/%u %u: "
+ "proc data alloc failed\n",
+ current->pid, current->tgid, current_fsuid());
+ res = -ENOMEM;
+ goto err_unlock_free_utd;
+ }
+ new_pqd_entry->pid = current->tgid;
+ INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
+ new_pqd_entry->parent_tag_data = utd_entry;
+ utd_entry->num_pqd++;
+
+ proc_qtu_data_tree_insert(new_pqd_entry,
+ &proc_qtu_data_tree);
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
+ current_fsuid(), new_pqd_entry);
+ file->private_data = new_pqd_entry;
+ return 0;
+
+err_unlock_free_utd:
+ if (!utd_entry_found) {
+ rb_erase(&utd_entry->node, &uid_tag_data_tree);
+ kfree(utd_entry);
+ }
+err_unlock:
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ return res;
+}
+
+static int qtudev_release(struct inode *inode, struct file *file)
+{
+ struct proc_qtu_data *pqd_entry = file->private_data;
+ struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
+ struct sock_tag *st_entry;
+ struct rb_root st_to_free_tree = RB_ROOT;
+ struct list_head *entry, *next;
+ struct tag_ref *tr;
+
+ if (unlikely(qtu_proc_handling_passive))
+ return 0;
+
+ /*
+ * Do not trust the current->pid, it might just be a kworker cleaning
+ * up after a dead proc.
+ */
+ DR_DEBUG("qtaguid: qtudev_release(): "
+ "pid=%u tgid=%u uid=%u "
+ "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
+ current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
+ pqd_entry, pqd_entry->pid, utd_entry,
+ utd_entry->num_active_tags);
+
+ spin_lock_bh(&sock_tag_list_lock);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+
+ list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
+ st_entry = list_entry(entry, struct sock_tag, list);
+ DR_DEBUG("qtaguid: %s(): "
+ "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
+ __func__,
+ st_entry, st_entry->sk,
+ current->pid, current->tgid,
+ pqd_entry->parent_tag_data->uid);
+
+ utd_entry = uid_tag_data_tree_search(
+ &uid_tag_data_tree,
+ get_uid_from_tag(st_entry->tag));
+ BUG_ON(IS_ERR_OR_NULL(utd_entry));
+ DR_DEBUG("qtaguid: %s(): "
+ "looking for tag=0x%llx in utd_entry=%p\n", __func__,
+ st_entry->tag, utd_entry);
+ tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
+ st_entry->tag);
+ BUG_ON(!tr);
+ BUG_ON(tr->num_sock_tags <= 0);
+ tr->num_sock_tags--;
+ free_tag_ref_from_utd_entry(tr, utd_entry);
+
+ rb_erase(&st_entry->sock_node, &sock_tag_tree);
+ list_del(&st_entry->list);
+ /* Can't sockfd_put() within spinlock, do it later. */
+ sock_tag_tree_insert(st_entry, &st_to_free_tree);
+
+ /*
+ * Try to free the utd_entry if no other proc_qtu_data is
+ * using it (num_pqd is 0) and it doesn't have active tags
+ * (num_active_tags is 0).
+ */
+ put_utd_entry(utd_entry);
+ }
+
+ rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
+ BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
+ pqd_entry->parent_tag_data->num_pqd--;
+ put_utd_entry(pqd_entry->parent_tag_data);
+ kfree(pqd_entry);
+ file->private_data = NULL;
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ spin_unlock_bh(&sock_tag_list_lock);
+
+
+ sock_tag_tree_erase(&st_to_free_tree);
+
+ prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
+ current->pid, current->tgid);
+ return 0;
+}
+
+/*------------------------------------------*/
+static const struct file_operations qtudev_fops = {
+ .owner = THIS_MODULE,
+ .open = qtudev_open,
+ .release = qtudev_release,
+};
+
+static struct miscdevice qtu_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = QTU_DEV_NAME,
+ .fops = &qtudev_fops,
+ /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
+};
+
+static const struct seq_operations proc_qtaguid_ctrl_seqops = {
+ .start = qtaguid_ctrl_proc_start,
+ .next = qtaguid_ctrl_proc_next,
+ .stop = qtaguid_ctrl_proc_stop,
+ .show = qtaguid_ctrl_proc_show,
+};
+
+static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
+ sizeof(struct proc_ctrl_print_info));
+}
+
+static const struct file_operations proc_qtaguid_ctrl_fops = {
+ .open = proc_qtaguid_ctrl_open,
+ .read = seq_read,
+ .write = qtaguid_ctrl_proc_write,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static const struct seq_operations proc_qtaguid_stats_seqops = {
+ .start = qtaguid_stats_proc_start,
+ .next = qtaguid_stats_proc_next,
+ .stop = qtaguid_stats_proc_stop,
+ .show = qtaguid_stats_proc_show,
+};
+
+static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &proc_qtaguid_stats_seqops,
+ sizeof(struct proc_print_info));
+}
+
+static const struct file_operations proc_qtaguid_stats_fops = {
+ .open = proc_qtaguid_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*------------------------------------------*/
+static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
+{
+ int ret;
+ *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
+ if (!*res_procdir) {
+ pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
+ ret = -ENOMEM;
+ goto no_dir;
+ }
+
+ xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
+ *res_procdir,
+ &proc_qtaguid_ctrl_fops,
+ NULL);
+ if (!xt_qtaguid_ctrl_file) {
+ pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
+ " file\n");
+ ret = -ENOMEM;
+ goto no_ctrl_entry;
+ }
+
+ xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
+ *res_procdir,
+ &proc_qtaguid_stats_fops,
+ NULL);
+ if (!xt_qtaguid_stats_file) {
+ pr_err("qtaguid: failed to create xt_qtaguid/stats "
+ "file\n");
+ ret = -ENOMEM;
+ goto no_stats_entry;
+ }
+ /*
+ * TODO: add support counter hacking
+ * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
+ */
+ return 0;
+
+no_stats_entry:
+ remove_proc_entry("ctrl", *res_procdir);
+no_ctrl_entry:
+ remove_proc_entry("xt_qtaguid", NULL);
+no_dir:
+ return ret;
+}
+
+static struct xt_match qtaguid_mt_reg __read_mostly = {
+ /*
+ * This module masquerades as the "owner" module so that iptables
+ * tools can deal with it.
+ */
+ .name = "owner",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .match = qtaguid_mt,
+ .matchsize = sizeof(struct xt_qtaguid_match_info),
+ .me = THIS_MODULE,
+};
+
+static int __init qtaguid_mt_init(void)
+{
+ if (qtaguid_proc_register(&xt_qtaguid_procdir)
+ || iface_stat_init(xt_qtaguid_procdir)
+ || xt_register_match(&qtaguid_mt_reg)
+ || misc_register(&qtu_device))
+ return -1;
+ return 0;
+}
+
+/*
+ * TODO: allow unloading of the module.
+ * For now stats are permanent.
+ * Kconfig forces'y/n' and never an 'm'.
+ */
+
+module_init(qtaguid_mt_init);
+MODULE_AUTHOR("jpa <jpa@google.com>");
+MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
+MODULE_ALIAS("ipt_qtaguid");
+MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 000000000000..6dc14a9c6889
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,352 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_INTERNAL_H__
+#define __XT_QTAGUID_INTERNAL_H__
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock_types.h>
+#include <linux/workqueue.h>
+
+/* Iface handling */
+#define IDEBUG_MASK (1<<0)
+/* Iptable Matching. Per packet. */
+#define MDEBUG_MASK (1<<1)
+/* Red-black tree handling. Per packet. */
+#define RDEBUG_MASK (1<<2)
+/* procfs ctrl/stats handling */
+#define CDEBUG_MASK (1<<3)
+/* dev and resource tracking */
+#define DDEBUG_MASK (1<<4)
+
+/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
+#define DEFAULT_DEBUG_MASK 0
+
+/*
+ * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
+ * All undef: text size ~ 0x3030; all def: ~ 0x4404.
+ */
+#define IDEBUG
+#define MDEBUG
+#define RDEBUG
+#define CDEBUG
+#define DDEBUG
+
+#define MSK_DEBUG(mask, ...) do { \
+ if (unlikely(qtaguid_debug_mask & (mask))) \
+ pr_debug(__VA_ARGS__); \
+ } while (0)
+#ifdef IDEBUG
+#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
+#else
+#define IF_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef MDEBUG
+#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
+#else
+#define MT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef RDEBUG
+#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
+#else
+#define RB_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef CDEBUG
+#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
+#else
+#define CT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef DDEBUG
+#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
+#else
+#define DR_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+
+extern uint qtaguid_debug_mask;
+
+/*---------------------------------------------------------------------------*/
+/*
+ * Tags:
+ *
+ * They represent what the data usage counters will be tracked against.
+ * By default a tag is just based on the UID.
+ * The UID is used as the base for policing, and can not be ignored.
+ * So a tag will always at least represent a UID (uid_tag).
+ *
+ * A tag can be augmented with an "accounting tag" which is associated
+ * with a UID.
+ * User space can set the acct_tag portion of the tag which is then used
+ * with sockets: all data belonging to that socket will be counted against the
+ * tag. The policing is then based on the tag's uid_tag portion,
+ * and stats are collected for the acct_tag portion separately.
+ *
+ * There could be
+ * a: {acct_tag=1, uid_tag=10003}
+ * b: {acct_tag=2, uid_tag=10003}
+ * c: {acct_tag=3, uid_tag=10003}
+ * d: {acct_tag=0, uid_tag=10003}
+ * a, b, and c represent tags associated with specific sockets.
+ * d is for the totals for that uid, including all untagged traffic.
+ * Typically d is used with policing/quota rules.
+ *
+ * We want tag_t big enough to distinguish uid_t and acct_tag.
+ * It might become a struct if needed.
+ * Nothing should be using it as an int.
+ */
+typedef uint64_t tag_t; /* Only used via accessors */
+
+#define TAG_UID_MASK 0xFFFFFFFFULL
+#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
+
+static inline int tag_compare(tag_t t1, tag_t t2)
+{
+ return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
+}
+
+static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
+{
+ return acct_tag | uid;
+}
+static inline tag_t make_tag_from_uid(uid_t uid)
+{
+ return uid;
+}
+static inline uid_t get_uid_from_tag(tag_t tag)
+{
+ return tag & TAG_UID_MASK;
+}
+static inline tag_t get_utag_from_tag(tag_t tag)
+{
+ return tag & TAG_UID_MASK;
+}
+static inline tag_t get_atag_from_tag(tag_t tag)
+{
+ return tag & TAG_ACCT_MASK;
+}
+
+static inline bool valid_atag(tag_t tag)
+{
+ return !(tag & TAG_UID_MASK);
+}
+static inline tag_t make_atag_from_value(uint32_t value)
+{
+ return (uint64_t)value << 32;
+}
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Maximum number of socket tags that a UID is allowed to have active.
+ * Multiple processes belonging to the same UID contribute towards this limit.
+ * Special UIDs that can impersonate a UID also contribute (e.g. download
+ * manager, ...)
+ */
+#define DEFAULT_MAX_SOCK_TAGS 1024
+
+/*
+ * For now we only track 2 sets of counters.
+ * The default set is 0.
+ * Userspace can activate another set for a given uid being tracked.
+ */
+#define IFS_MAX_COUNTER_SETS 2
+
+enum ifs_tx_rx {
+ IFS_TX,
+ IFS_RX,
+ IFS_MAX_DIRECTIONS
+};
+
+/* For now, TCP, UDP, the rest */
+enum ifs_proto {
+ IFS_TCP,
+ IFS_UDP,
+ IFS_PROTO_OTHER,
+ IFS_MAX_PROTOS
+};
+
+struct byte_packet_counters {
+ uint64_t bytes;
+ uint64_t packets;
+};
+
+struct data_counters {
+ struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
+};
+
+static inline uint64_t dc_sum_bytes(struct data_counters *counters,
+ int set,
+ enum ifs_tx_rx direction)
+{
+ return counters->bpc[set][direction][IFS_TCP].bytes
+ + counters->bpc[set][direction][IFS_UDP].bytes
+ + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
+}
+
+static inline uint64_t dc_sum_packets(struct data_counters *counters,
+ int set,
+ enum ifs_tx_rx direction)
+{
+ return counters->bpc[set][direction][IFS_TCP].packets
+ + counters->bpc[set][direction][IFS_UDP].packets
+ + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
+}
+
+
+/* Generic X based nodes used as a base for rb_tree ops */
+struct tag_node {
+ struct rb_node node;
+ tag_t tag;
+};
+
+struct tag_stat {
+ struct tag_node tn;
+ struct data_counters counters;
+ /*
+ * If this tag is acct_tag based, we need to count against the
+ * matching parent uid_tag.
+ */
+ struct data_counters *parent_counters;
+};
+
+struct iface_stat {
+ struct list_head list; /* in iface_stat_list */
+ char *ifname;
+ bool active;
+ /* net_dev is only valid for active iface_stat */
+ struct net_device *net_dev;
+
+ struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
+ struct data_counters totals_via_skb;
+ /*
+ * We keep the last_known, because some devices reset their counters
+ * just before NETDEV_UP, while some will reset just before
+ * NETDEV_REGISTER (which is more normal).
+ * So now, if the device didn't do a NETDEV_UNREGISTER and we see
+ * its current dev stats smaller that what was previously known, we
+ * assume an UNREGISTER and just use the last_known.
+ */
+ struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
+ /* last_known is usable when last_known_valid is true */
+ bool last_known_valid;
+
+ struct proc_dir_entry *proc_ptr;
+
+ struct rb_root tag_stat_tree;
+ spinlock_t tag_stat_list_lock;
+};
+
+/* This is needed to create proc_dir_entries from atomic context. */
+struct iface_stat_work {
+ struct work_struct iface_work;
+ struct iface_stat *iface_entry;
+};
+
+/*
+ * Track tag that this socket is transferring data for, and not necessarily
+ * the uid that owns the socket.
+ * This is the tag against which tag_stat.counters will be billed.
+ * These structs need to be looked up by sock and pid.
+ */
+struct sock_tag {
+ struct rb_node sock_node;
+ struct sock *sk; /* Only used as a number, never dereferenced */
+ /* The socket is needed for sockfd_put() */
+ struct socket *socket;
+ /* Used to associate with a given pid */
+ struct list_head list; /* in proc_qtu_data.sock_tag_list */
+ pid_t pid;
+
+ tag_t tag;
+};
+
+struct qtaguid_event_counts {
+ /* Various successful events */
+ atomic64_t sockets_tagged;
+ atomic64_t sockets_untagged;
+ atomic64_t counter_set_changes;
+ atomic64_t delete_cmds;
+ atomic64_t iface_events; /* Number of NETDEV_* events handled */
+
+ atomic64_t match_calls; /* Number of times iptables called mt */
+ /* Number of times iptables called mt from pre or post routing hooks */
+ atomic64_t match_calls_prepost;
+ /*
+ * match_found_sk_*: numbers related to the netfilter matching
+ * function finding a sock for the sk_buff.
+ * Total skbs processed is sum(match_found*).
+ */
+ atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
+ /* The connection tracker had or didn't have the sk. */
+ atomic64_t match_found_sk_in_ct;
+ atomic64_t match_found_no_sk_in_ct;
+ /*
+ * No sk could be found. No apparent owner. Could happen with
+ * unsolicited traffic.
+ */
+ atomic64_t match_no_sk;
+ /*
+ * The file ptr in the sk_socket wasn't there.
+ * This might happen for traffic while the socket is being closed.
+ */
+ atomic64_t match_no_sk_file;
+};
+
+/* Track the set active_set for the given tag. */
+struct tag_counter_set {
+ struct tag_node tn;
+ int active_set;
+};
+
+/*----------------------------------------------*/
+/*
+ * The qtu uid data is used to track resources that are created directly or
+ * indirectly by processes (uid tracked).
+ * It is shared by the processes with the same uid.
+ * Some of the resource will be counted to prevent further rogue allocations,
+ * some will need freeing once the owner process (uid) exits.
+ */
+struct uid_tag_data {
+ struct rb_node node;
+ uid_t uid;
+
+ /*
+ * For the uid, how many accounting tags have been set.
+ */
+ int num_active_tags;
+ /* Track the number of proc_qtu_data that reference it */
+ int num_pqd;
+ struct rb_root tag_ref_tree;
+ /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
+};
+
+struct tag_ref {
+ struct tag_node tn;
+
+ /*
+ * This tracks the number of active sockets that have a tag on them
+ * which matches this tag_ref.tn.tag.
+ * A tag ref can live on after the sockets are untagged.
+ * A tag ref can only be removed during a tag delete command.
+ */
+ int num_sock_tags;
+};
+
+struct proc_qtu_data {
+ struct rb_node node;
+ pid_t pid;
+
+ struct uid_tag_data *parent_tag_data;
+
+ /* Tracks the sock_tags that need freeing upon this proc's death */
+ struct list_head sock_tag_list;
+ /* No spinlock_t sock_tag_list_lock; use the global one. */
+};
+
+/*----------------------------------------------*/
+#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 000000000000..f6a00a3520ed
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,566 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Most of the functions in this file just waste time if DEBUG is not defined.
+ * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
+ * debug flags ore not defined.
+ * Those funcs that fail to allocate memory will panic as there is no need to
+ * hobble allong just pretending to do the requested work.
+ */
+
+#define DEBUG
+
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+
+
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+
+#ifdef DDEBUG
+
+static void _bug_on_err_or_null(void *ptr)
+{
+ if (IS_ERR_OR_NULL(ptr)) {
+ pr_err("qtaguid: kmalloc failed\n");
+ BUG();
+ }
+}
+
+char *pp_tag_t(tag_t *tag)
+{
+ char *res;
+
+ if (!tag)
+ res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
+ else
+ res = kasprintf(GFP_ATOMIC,
+ "tag_t@%p{tag=0x%llx, uid=%u}",
+ tag, *tag, get_uid_from_tag(*tag));
+ _bug_on_err_or_null(res);
+ return res;
+}
+
+char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+ char *res;
+
+ if (!dc)
+ res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
+ else if (showValues)
+ res = kasprintf(
+ GFP_ATOMIC, "data_counters@%p{"
+ "set0{"
+ "rx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}, "
+ "tx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}}, "
+ "set1{"
+ "rx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}, "
+ "tx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}}}",
+ dc,
+ dc->bpc[0][IFS_RX][IFS_TCP].bytes,
+ dc->bpc[0][IFS_RX][IFS_TCP].packets,
+ dc->bpc[0][IFS_RX][IFS_UDP].bytes,
+ dc->bpc[0][IFS_RX][IFS_UDP].packets,
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
+ dc->bpc[0][IFS_TX][IFS_TCP].bytes,
+ dc->bpc[0][IFS_TX][IFS_TCP].packets,
+ dc->bpc[0][IFS_TX][IFS_UDP].bytes,
+ dc->bpc[0][IFS_TX][IFS_UDP].packets,
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
+ dc->bpc[1][IFS_RX][IFS_TCP].bytes,
+ dc->bpc[1][IFS_RX][IFS_TCP].packets,
+ dc->bpc[1][IFS_RX][IFS_UDP].bytes,
+ dc->bpc[1][IFS_RX][IFS_UDP].packets,
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
+ dc->bpc[1][IFS_TX][IFS_TCP].bytes,
+ dc->bpc[1][IFS_TX][IFS_TCP].packets,
+ dc->bpc[1][IFS_TX][IFS_UDP].bytes,
+ dc->bpc[1][IFS_TX][IFS_UDP].packets,
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
+ else
+ res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
+ _bug_on_err_or_null(res);
+ return res;
+}
+
+char *pp_tag_node(struct tag_node *tn)
+{
+ char *tag_str;
+ char *res;
+
+ if (!tn) {
+ res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
+ _bug_on_err_or_null(res);
+ return res;
+ }
+ tag_str = pp_tag_t(&tn->tag);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_node@%p{tag=%s}",
+ tn, tag_str);
+ _bug_on_err_or_null(res);
+ kfree(tag_str);
+ return res;
+}
+
+char *pp_tag_ref(struct tag_ref *tr)
+{
+ char *tn_str;
+ char *res;
+
+ if (!tr) {
+ res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
+ _bug_on_err_or_null(res);
+ return res;
+ }
+ tn_str = pp_tag_node(&tr->tn);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_ref@%p{%s, num_sock_tags=%d}",
+ tr, tn_str, tr->num_sock_tags);
+ _bug_on_err_or_null(res);
+ kfree(tn_str);
+ return res;
+}
+
+char *pp_tag_stat(struct tag_stat *ts)
+{
+ char *tn_str;
+ char *counters_str;
+ char *parent_counters_str;
+ char *res;
+
+ if (!ts) {
+ res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
+ _bug_on_err_or_null(res);
+ return res;
+ }
+ tn_str = pp_tag_node(&ts->tn);
+ counters_str = pp_data_counters(&ts->counters, true);
+ parent_counters_str = pp_data_counters(ts->parent_counters, false);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
+ ts, tn_str, counters_str, parent_counters_str);
+ _bug_on_err_or_null(res);
+ kfree(tn_str);
+ kfree(counters_str);
+ kfree(parent_counters_str);
+ return res;
+}
+
+char *pp_iface_stat(struct iface_stat *is)
+{
+ char *res;
+ if (!is) {
+ res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
+ } else {
+ struct data_counters *cnts = &is->totals_via_skb;
+ res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
+ "list=list_head{...}, "
+ "ifname=%s, "
+ "total_dev={rx={bytes=%llu, "
+ "packets=%llu}, "
+ "tx={bytes=%llu, "
+ "packets=%llu}}, "
+ "total_skb={rx={bytes=%llu, "
+ "packets=%llu}, "
+ "tx={bytes=%llu, "
+ "packets=%llu}}, "
+ "last_known_valid=%d, "
+ "last_known={rx={bytes=%llu, "
+ "packets=%llu}, "
+ "tx={bytes=%llu, "
+ "packets=%llu}}, "
+ "active=%d, "
+ "net_dev=%p, "
+ "proc_ptr=%p, "
+ "tag_stat_tree=rb_root{...}}",
+ is,
+ is->ifname,
+ is->totals_via_dev[IFS_RX].bytes,
+ is->totals_via_dev[IFS_RX].packets,
+ is->totals_via_dev[IFS_TX].bytes,
+ is->totals_via_dev[IFS_TX].packets,
+ dc_sum_bytes(cnts, 0, IFS_RX),
+ dc_sum_packets(cnts, 0, IFS_RX),
+ dc_sum_bytes(cnts, 0, IFS_TX),
+ dc_sum_packets(cnts, 0, IFS_TX),
+ is->last_known_valid,
+ is->last_known[IFS_RX].bytes,
+ is->last_known[IFS_RX].packets,
+ is->last_known[IFS_TX].bytes,
+ is->last_known[IFS_TX].packets,
+ is->active,
+ is->net_dev,
+ is->proc_ptr);
+ }
+ _bug_on_err_or_null(res);
+ return res;
+}
+
+char *pp_sock_tag(struct sock_tag *st)
+{
+ char *tag_str;
+ char *res;
+
+ if (!st) {
+ res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
+ _bug_on_err_or_null(res);
+ return res;
+ }
+ tag_str = pp_tag_t(&st->tag);
+ res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
+ "sock_node=rb_node{...}, "
+ "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
+ "pid=%u, tag=%s}",
+ st, st->sk, st->socket, atomic_long_read(
+ &st->socket->file->f_count),
+ st->pid, tag_str);
+ _bug_on_err_or_null(res);
+ kfree(tag_str);
+ return res;
+}
+
+char *pp_uid_tag_data(struct uid_tag_data *utd)
+{
+ char *res;
+
+ if (!utd)
+ res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
+ else
+ res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
+ "uid=%u, num_active_acct_tags=%d, "
+ "num_pqd=%d, "
+ "tag_node_tree=rb_root{...}, "
+ "proc_qtu_data_tree=rb_root{...}}",
+ utd, utd->uid,
+ utd->num_active_tags, utd->num_pqd);
+ _bug_on_err_or_null(res);
+ return res;
+}
+
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+ char *parent_tag_data_str;
+ char *res;
+
+ if (!pqd) {
+ res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
+ _bug_on_err_or_null(res);
+ return res;
+ }
+ parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
+ res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
+ "node=rb_node{...}, pid=%u, "
+ "parent_tag_data=%s, "
+ "sock_tag_list=list_head{...}}",
+ pqd, pqd->pid, parent_tag_data_str
+ );
+ _bug_on_err_or_null(res);
+ kfree(parent_tag_data_str);
+ return res;
+}
+
+/*------------------------------------------*/
+void prdebug_sock_tag_tree(int indent_level,
+ struct rb_root *sock_tag_tree)
+{
+ struct rb_node *node;
+ struct sock_tag *sock_tag_entry;
+ char *str;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (RB_EMPTY_ROOT(sock_tag_tree)) {
+ str = "sock_tag_tree=rb_root{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "sock_tag_tree=rb_root{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(sock_tag_tree);
+ node;
+ node = rb_next(node)) {
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+ str = pp_sock_tag(sock_tag_entry);
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_sock_tag_list(int indent_level,
+ struct list_head *sock_tag_list)
+{
+ struct sock_tag *sock_tag_entry;
+ char *str;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (list_empty(sock_tag_list)) {
+ str = "sock_tag_list=list_head{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "sock_tag_list=list_head{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
+ str = pp_sock_tag(sock_tag_entry);
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_proc_qtu_data_tree(int indent_level,
+ struct rb_root *proc_qtu_data_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct proc_qtu_data *proc_qtu_data_entry;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
+ str = "proc_qtu_data_tree=rb_root{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "proc_qtu_data_tree=rb_root{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(proc_qtu_data_tree);
+ node;
+ node = rb_next(node)) {
+ proc_qtu_data_entry = rb_entry(node,
+ struct proc_qtu_data,
+ node);
+ str = pp_proc_qtu_data(proc_qtu_data_entry);
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ indent_level++;
+ prdebug_sock_tag_list(indent_level,
+ &proc_qtu_data_entry->sock_tag_list);
+ indent_level--;
+
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct tag_ref *tag_ref_entry;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (RB_EMPTY_ROOT(tag_ref_tree)) {
+ str = "tag_ref_tree{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "tag_ref_tree{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(tag_ref_tree);
+ node;
+ node = rb_next(node)) {
+ tag_ref_entry = rb_entry(node,
+ struct tag_ref,
+ tn.node);
+ str = pp_tag_ref(tag_ref_entry);
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_uid_tag_data_tree(int indent_level,
+ struct rb_root *uid_tag_data_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct uid_tag_data *uid_tag_data_entry;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
+ str = "uid_tag_data_tree=rb_root{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "uid_tag_data_tree=rb_root{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(uid_tag_data_tree);
+ node;
+ node = rb_next(node)) {
+ uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
+ node);
+ str = pp_uid_tag_data(uid_tag_data_entry);
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
+ indent_level++;
+ prdebug_tag_ref_tree(indent_level,
+ &uid_tag_data_entry->tag_ref_tree);
+ indent_level--;
+ }
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_stat_tree(int indent_level,
+ struct rb_root *tag_stat_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct tag_stat *ts_entry;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (RB_EMPTY_ROOT(tag_stat_tree)) {
+ str = "tag_stat_tree{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "tag_stat_tree{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(tag_stat_tree);
+ node;
+ node = rb_next(node)) {
+ ts_entry = rb_entry(node, struct tag_stat, tn.node);
+ str = pp_tag_stat(ts_entry);
+ pr_debug("%*d: %s\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_iface_stat_list(int indent_level,
+ struct list_head *iface_stat_list)
+{
+ char *str;
+ struct iface_stat *iface_entry;
+
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+ return;
+
+ if (list_empty(iface_stat_list)) {
+ str = "iface_stat_list=list_head{}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ return;
+ }
+
+ str = "iface_stat_list=list_head{";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ list_for_each_entry(iface_entry, iface_stat_list, list) {
+ str = pp_iface_stat(iface_entry);
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+ kfree(str);
+
+ spin_lock_bh(&iface_entry->tag_stat_list_lock);
+ if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
+ indent_level++;
+ prdebug_tag_stat_tree(indent_level,
+ &iface_entry->tag_stat_tree);
+ indent_level--;
+ }
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+ }
+ indent_level--;
+ str = "}";
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+#endif /* ifdef DDEBUG */
+/*------------------------------------------*/
+static const char * const netdev_event_strings[] = {
+ "netdev_unknown",
+ "NETDEV_UP",
+ "NETDEV_DOWN",
+ "NETDEV_REBOOT",
+ "NETDEV_CHANGE",
+ "NETDEV_REGISTER",
+ "NETDEV_UNREGISTER",
+ "NETDEV_CHANGEMTU",
+ "NETDEV_CHANGEADDR",
+ "NETDEV_GOING_DOWN",
+ "NETDEV_CHANGENAME",
+ "NETDEV_FEAT_CHANGE",
+ "NETDEV_BONDING_FAILOVER",
+ "NETDEV_PRE_UP",
+ "NETDEV_PRE_TYPE_CHANGE",
+ "NETDEV_POST_TYPE_CHANGE",
+ "NETDEV_POST_INIT",
+ "NETDEV_UNREGISTER_BATCH",
+ "NETDEV_RELEASE",
+ "NETDEV_NOTIFY_PEERS",
+ "NETDEV_JOIN",
+};
+
+const char *netdev_evt_str(int netdev_event)
+{
+ if (netdev_event < 0
+ || netdev_event >= ARRAY_SIZE(netdev_event_strings))
+ return "bad event num";
+ return netdev_event_strings[netdev_event];
+}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 000000000000..b63871a0be5a
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_PRINT_H__
+#define __XT_QTAGUID_PRINT_H__
+
+#include "xt_qtaguid_internal.h"
+
+#ifdef DDEBUG
+
+char *pp_tag_t(tag_t *tag);
+char *pp_data_counters(struct data_counters *dc, bool showValues);
+char *pp_tag_node(struct tag_node *tn);
+char *pp_tag_ref(struct tag_ref *tr);
+char *pp_tag_stat(struct tag_stat *ts);
+char *pp_iface_stat(struct iface_stat *is);
+char *pp_sock_tag(struct sock_tag *st);
+char *pp_uid_tag_data(struct uid_tag_data *qtd);
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
+
+/*------------------------------------------*/
+void prdebug_sock_tag_list(int indent_level,
+ struct list_head *sock_tag_list);
+void prdebug_sock_tag_tree(int indent_level,
+ struct rb_root *sock_tag_tree);
+void prdebug_proc_qtu_data_tree(int indent_level,
+ struct rb_root *proc_qtu_data_tree);
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
+void prdebug_uid_tag_data_tree(int indent_level,
+ struct rb_root *uid_tag_data_tree);
+void prdebug_tag_stat_tree(int indent_level,
+ struct rb_root *tag_stat_tree);
+void prdebug_iface_stat_list(int indent_level,
+ struct list_head *iface_stat_list);
+
+#else
+
+/*------------------------------------------*/
+static inline char *pp_tag_t(tag_t *tag)
+{
+ return NULL;
+}
+static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+ return NULL;
+}
+static inline char *pp_tag_node(struct tag_node *tn)
+{
+ return NULL;
+}
+static inline char *pp_tag_ref(struct tag_ref *tr)
+{
+ return NULL;
+}
+static inline char *pp_tag_stat(struct tag_stat *ts)
+{
+ return NULL;
+}
+static inline char *pp_iface_stat(struct iface_stat *is)
+{
+ return NULL;
+}
+static inline char *pp_sock_tag(struct sock_tag *st)
+{
+ return NULL;
+}
+static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
+{
+ return NULL;
+}
+static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+ return NULL;
+}
+
+/*------------------------------------------*/
+static inline
+void prdebug_sock_tag_list(int indent_level,
+ struct list_head *sock_tag_list)
+{
+}
+static inline
+void prdebug_sock_tag_tree(int indent_level,
+ struct rb_root *sock_tag_tree)
+{
+}
+static inline
+void prdebug_proc_qtu_data_tree(int indent_level,
+ struct rb_root *proc_qtu_data_tree)
+{
+}
+static inline
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+}
+static inline
+void prdebug_uid_tag_data_tree(int indent_level,
+ struct rb_root *uid_tag_data_tree)
+{
+}
+static inline
+void prdebug_tag_stat_tree(int indent_level,
+ struct rb_root *tag_stat_tree)
+{
+}
+static inline
+void prdebug_iface_stat_list(int indent_level,
+ struct list_head *iface_stat_list)
+{
+}
+#endif
+/*------------------------------------------*/
+const char *netdev_evt_str(int netdev_event);
+#endif /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 000000000000..4328562572f6
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,385 @@
+/*
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets
+ * as a minimal accounting match.
+ * by Jan Engelhardt <jengelh@medozas.de>, 2008
+ *
+ * Originally based on xt_quota.c:
+ * netfilter module to enforce network quotas
+ * Sam Johnston <samj@samj.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License; either
+ * version 2 of the License, as published by the Free Software Foundation.
+ */
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota2.h>
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#endif
+
+/**
+ * @lock: lock to protect quota writers from each other
+ */
+struct xt_quota_counter {
+ u_int64_t quota;
+ spinlock_t lock;
+ struct list_head list;
+ atomic_t ref;
+ char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
+ struct proc_dir_entry *procfs_entry;
+};
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* Harald's favorite number +1 :D From ipt_ULOG.C */
+static int qlog_nl_event = 112;
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(event_num,
+ "Event number for NETLINK_NFLOG message. 0 disables log."
+ "111 is what ipt_ULOG uses.");
+static struct sock *nflognl;
+#endif
+
+static LIST_HEAD(counter_list);
+static DEFINE_SPINLOCK(counter_list_lock);
+
+static struct proc_dir_entry *proc_xt_quota;
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
+static unsigned int quota_list_uid = 0;
+static unsigned int quota_list_gid = 0;
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
+module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR);
+module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR);
+
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+static void quota2_log(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *prefix)
+{
+ ulog_packet_msg_t *pm;
+ struct sk_buff *log_skb;
+ size_t size;
+ struct nlmsghdr *nlh;
+
+ if (!qlog_nl_event)
+ return;
+
+ size = NLMSG_SPACE(sizeof(*pm));
+ size = max(size, (size_t)NLMSG_GOODSIZE);
+ log_skb = alloc_skb(size, GFP_ATOMIC);
+ if (!log_skb) {
+ pr_err("xt_quota2: cannot alloc skb for logging\n");
+ return;
+ }
+
+ nlh = nlmsg_put(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
+ sizeof(*pm), 0);
+ if (!nlh) {
+ pr_err("xt_quota2: nlmsg_put failed\n");
+ kfree_skb(log_skb);
+ return;
+ }
+ pm = nlmsg_data(nlh);
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp((struct sk_buff *)skb);
+ pm->data_len = 0;
+ pm->hook = hooknum;
+ if (prefix != NULL)
+ strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
+ else
+ *(pm->prefix) = '\0';
+ if (in)
+ strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+ else
+ pm->indev_name[0] = '\0';
+
+ if (out)
+ strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+ else
+ pm->outdev_name[0] = '\0';
+
+ NETLINK_CB(log_skb).dst_group = 1;
+ pr_debug("throwing 1 packets to netlink group 1\n");
+ netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
+}
+#else
+static void quota2_log(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *prefix)
+{
+}
+#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
+
+static ssize_t quota_proc_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+ char tmp[24];
+ size_t tmp_size;
+
+ spin_lock_bh(&e->lock);
+ tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", e->quota);
+ spin_unlock_bh(&e->lock);
+ return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t quota_proc_write(struct file *file, const char __user *input,
+ size_t size, loff_t *ppos)
+{
+ struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+ char buf[sizeof("18446744073709551616")];
+
+ if (size > sizeof(buf))
+ size = sizeof(buf);
+ if (copy_from_user(buf, input, size) != 0)
+ return -EFAULT;
+ buf[sizeof(buf)-1] = '\0';
+
+ spin_lock_bh(&e->lock);
+ e->quota = simple_strtoull(buf, NULL, 0);
+ spin_unlock_bh(&e->lock);
+ return size;
+}
+
+static const struct file_operations q2_counter_fops = {
+ .read = quota_proc_read,
+ .write = quota_proc_write,
+ .llseek = default_llseek,
+};
+
+static struct xt_quota_counter *
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
+{
+ struct xt_quota_counter *e;
+ unsigned int size;
+
+ /* Do not need all the procfs things for anonymous counters. */
+ size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
+ e = kmalloc(size, GFP_KERNEL);
+ if (e == NULL)
+ return NULL;
+
+ e->quota = q->quota;
+ spin_lock_init(&e->lock);
+ if (!anon) {
+ INIT_LIST_HEAD(&e->list);
+ atomic_set(&e->ref, 1);
+ strlcpy(e->name, q->name, sizeof(e->name));
+ }
+ return e;
+}
+
+/**
+ * q2_get_counter - get ref to counter or create new
+ * @name: name of counter
+ */
+static struct xt_quota_counter *
+q2_get_counter(const struct xt_quota_mtinfo2 *q)
+{
+ struct proc_dir_entry *p;
+ struct xt_quota_counter *e = NULL;
+ struct xt_quota_counter *new_e;
+
+ if (*q->name == '\0')
+ return q2_new_counter(q, true);
+
+ /* No need to hold a lock while getting a new counter */
+ new_e = q2_new_counter(q, false);
+ if (new_e == NULL)
+ goto out;
+
+ spin_lock_bh(&counter_list_lock);
+ list_for_each_entry(e, &counter_list, list)
+ if (strcmp(e->name, q->name) == 0) {
+ atomic_inc(&e->ref);
+ spin_unlock_bh(&counter_list_lock);
+ kfree(new_e);
+ pr_debug("xt_quota2: old counter name=%s", e->name);
+ return e;
+ }
+ e = new_e;
+ pr_debug("xt_quota2: new_counter name=%s", e->name);
+ list_add_tail(&e->list, &counter_list);
+ /* The entry having a refcount of 1 is not directly destructible.
+ * This func has not yet returned the new entry, thus iptables
+ * has not references for destroying this entry.
+ * For another rule to try to destroy it, it would 1st need for this
+ * func* to be re-invoked, acquire a new ref for the same named quota.
+ * Nobody will access the e->procfs_entry either.
+ * So release the lock. */
+ spin_unlock_bh(&counter_list_lock);
+
+ /* create_proc_entry() is not spin_lock happy */
+ p = e->procfs_entry = proc_create_data(e->name, quota_list_perms,
+ proc_xt_quota, &q2_counter_fops, e);
+
+ if (IS_ERR_OR_NULL(p)) {
+ spin_lock_bh(&counter_list_lock);
+ list_del(&e->list);
+ spin_unlock_bh(&counter_list_lock);
+ goto out;
+ }
+ proc_set_user(p, quota_list_uid, quota_list_gid);
+ return e;
+
+ out:
+ kfree(e);
+ return NULL;
+}
+
+static int quota_mt2_check(const struct xt_mtchk_param *par)
+{
+ struct xt_quota_mtinfo2 *q = par->matchinfo;
+
+ pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
+
+ if (q->flags & ~XT_QUOTA_MASK)
+ return -EINVAL;
+
+ q->name[sizeof(q->name)-1] = '\0';
+ if (*q->name == '.' || strchr(q->name, '/') != NULL) {
+ printk(KERN_ERR "xt_quota.3: illegal name\n");
+ return -EINVAL;
+ }
+
+ q->master = q2_get_counter(q);
+ if (q->master == NULL) {
+ printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
+{
+ struct xt_quota_mtinfo2 *q = par->matchinfo;
+ struct xt_quota_counter *e = q->master;
+
+ if (*q->name == '\0') {
+ kfree(e);
+ return;
+ }
+
+ spin_lock_bh(&counter_list_lock);
+ if (!atomic_dec_and_test(&e->ref)) {
+ spin_unlock_bh(&counter_list_lock);
+ return;
+ }
+
+ list_del(&e->list);
+ remove_proc_entry(e->name, proc_xt_quota);
+ spin_unlock_bh(&counter_list_lock);
+ kfree(e);
+}
+
+static bool
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
+ struct xt_quota_counter *e = q->master;
+ bool ret = q->flags & XT_QUOTA_INVERT;
+
+ spin_lock_bh(&e->lock);
+ if (q->flags & XT_QUOTA_GROW) {
+ /*
+ * While no_change is pointless in "grow" mode, we will
+ * implement it here simply to have a consistent behavior.
+ */
+ if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
+ e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+ }
+ ret = true;
+ } else {
+ if (e->quota >= skb->len) {
+ if (!(q->flags & XT_QUOTA_NO_CHANGE))
+ e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+ ret = !ret;
+ } else {
+ /* We are transitioning, log that fact. */
+ if (e->quota) {
+ quota2_log(par->hooknum,
+ skb,
+ par->in,
+ par->out,
+ q->name);
+ }
+ /* we do not allow even small packets from now on */
+ e->quota = 0;
+ }
+ }
+ spin_unlock_bh(&e->lock);
+ return ret;
+}
+
+static struct xt_match quota_mt2_reg[] __read_mostly = {
+ {
+ .name = "quota2",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .checkentry = quota_mt2_check,
+ .match = quota_mt2,
+ .destroy = quota_mt2_destroy,
+ .matchsize = sizeof(struct xt_quota_mtinfo2),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "quota2",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .checkentry = quota_mt2_check,
+ .match = quota_mt2,
+ .destroy = quota_mt2_destroy,
+ .matchsize = sizeof(struct xt_quota_mtinfo2),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init quota_mt2_init(void)
+{
+ int ret;
+ pr_debug("xt_quota2: init()");
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+ nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, NULL);
+ if (!nflognl)
+ return -ENOMEM;
+#endif
+
+ proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
+ if (proc_xt_quota == NULL)
+ return -EACCES;
+
+ ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+ if (ret < 0)
+ remove_proc_entry("xt_quota", init_net.proc_net);
+ pr_debug("xt_quota2: init() %d", ret);
+ return ret;
+}
+
+static void __exit quota_mt2_exit(void)
+{
+ xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+ remove_proc_entry("xt_quota", init_net.proc_net);
+}
+
+module_init(quota_mt2_init);
+module_exit(quota_mt2_exit);
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_quota2");
+MODULE_ALIAS("ip6t_quota2");
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 63b2bdb59e95..030f33cd9ee9 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,7 +35,7 @@
#include <net/netfilter/nf_conntrack.h>
#endif
-static void
+void
xt_socket_put_sk(struct sock *sk)
{
if (sk->sk_state == TCP_TIME_WAIT)
@@ -43,6 +43,7 @@ xt_socket_put_sk(struct sock *sk)
else
sock_put(sk);
}
+EXPORT_SYMBOL(xt_socket_put_sk);
static int
extract_icmp4_fields(const struct sk_buff *skb,
@@ -101,9 +102,8 @@ extract_icmp4_fields(const struct sk_buff *skb,
return 0;
}
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
- const struct xt_socket_mtinfo1 *info)
+struct sock*
+xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr _hdr, *hp = NULL;
@@ -120,7 +120,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
@@ -131,9 +131,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
&sport, &dport))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
#ifdef XT_SOCKET_HAVE_CONNTRACK
@@ -157,6 +157,23 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
+
+ pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
+ protocol, &saddr, ntohs(sport),
+ &daddr, ntohs(dport),
+ &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
+
+ return sk;
+}
+EXPORT_SYMBOL(xt_socket_get4_sk);
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ const struct xt_socket_mtinfo1 *info)
+{
+ struct sock *sk;
+
+ sk = xt_socket_get4_sk(skb, par);
if (sk != NULL) {
bool wildcard;
bool transparent = true;
@@ -179,11 +196,6 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
sk = NULL;
}
- pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
- protocol, &saddr, ntohs(sport),
- &daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
return (sk != NULL);
}
@@ -255,8 +267,8 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 0;
}
-static bool
-socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+struct sock*
+xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct udphdr _hdr, *hp = NULL;
@@ -264,7 +276,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
int thoff = 0, uninitialized_var(tproto);
- const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
@@ -276,7 +287,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
hp = skb_header_pointer(skb, thoff,
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
saddr = &iph->saddr;
sport = hp->source;
@@ -286,13 +297,30 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
} else if (tproto == IPPROTO_ICMPV6) {
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
+ pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
+ "(orig %pI6:%hu) sock %p\n",
+ tproto, saddr, ntohs(sport),
+ daddr, ntohs(dport),
+ &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
+ return sk;
+}
+EXPORT_SYMBOL(xt_socket_get6_sk);
+
+static bool
+socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct sock *sk;
+ const struct xt_socket_mtinfo1 *info;
+
+ info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ sk = xt_socket_get6_sk(skb, par);
if (sk != NULL) {
bool wildcard;
bool transparent = true;
@@ -315,12 +343,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
- "(orig %pI6:%hu) sock %p\n",
- tproto, saddr, ntohs(sport),
- daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
return (sk != NULL);
}
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c9c2a8441d32..b55ab2d9a94b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2760,7 +2760,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
struct sock *s = v;
struct netlink_sock *nlk = nlk_sk(s);
- seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
+ seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
nlk->portid,
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55b1769..dba14b02dd09 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,3 +14,12 @@ config PHONET
To compile this driver as a module, choose M here: the module
will be called phonet. If unsure, say N.
+
+
+if PHONET
+config PHONET_DEBUG
+ boolean "Debug support for PHONET drivers"
+ depends on DEBUG_KERNEL
+ help
+ Say "yes" to enable phonet debug messaging
+endif
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index e10b1b182ce3..f219b2dd4755 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -6,6 +6,7 @@ phonet-y := \
socket.o \
datagram.o \
sysctl.o \
- af_phonet.o
+ af_phonet.o \
+ ld_phonet.o
pn_pep-y := pep.o pep-gprs.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 5a940dbd74a3..da25b8a99317 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -34,6 +34,53 @@
#include <net/phonet/phonet.h>
#include <net/phonet/pn_dev.h>
+#ifdef ACTIVATE_PHONET_DEBUG
+
+enum phonet_debug_state phonet_dbg_state = OFF;
+
+static ssize_t phonet_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ printk(KERN_DEBUG "phonet show:\n");
+
+ switch (phonet_dbg_state) {
+ case ON:
+ return sprintf(buf, "on\n");
+ case OFF:
+ return sprintf(buf, "off\n");
+ case DATA:
+ return sprintf(buf, "data\n");
+ default:
+ return -ENODEV;
+ }
+
+ return -ENODEV;
+ /*return sprintf(buf, "%hu\n", phonet_dbg_state);*/
+}
+
+static ssize_t phonet_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ if (sysfs_streq(buf, "on")) {
+ phonet_dbg_state = ON;
+ printk(KERN_DEBUG "Phonet traces activated\nBe Careful do not trace Dmesg in MTDn");
+ } else if (sysfs_streq(buf, "off")) {
+ phonet_dbg_state = OFF;
+ } else if (sysfs_streq(buf, "off")) {
+ phonet_dbg_state = OFF;
+ } else if (sysfs_streq(buf, "data")) {
+ phonet_dbg_state = DATA;
+ } else {
+ printk(KERN_DEBUG "please use on/off/data\n");
+ }
+ return -EINVAL;
+}
+
+static struct kobj_attribute phonet_attr =
+ __ATTR(phonet_dbg, 0644, phonet_show, phonet_store);
+#endif
+
+
/* Transport protocol registration */
static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
@@ -68,8 +115,10 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
struct phonet_protocol *pnp;
int err;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+/*PATCH
+ * if (!capable(CAP_SYS_ADMIN))
+ * return -EPERM;
+ */
if (protocol == 0) {
/* Default protocol selection */
@@ -112,6 +161,8 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
pn->sobject = 0;
pn->dobject = 0;
pn->resource = 0;
+/*!*/ pn->resource_type = 0;
+/*!*/ pn->resource_subtype = 0;
sk->sk_prot->init(sk);
err = 0;
@@ -162,7 +213,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
u16 dst, u16 src, u8 res, u8 irq)
{
struct phonethdr *ph;
- int err;
+ int err, i;
if (skb->len + 2 > 0xffff /* Phonet length field limit */ ||
skb->len + sizeof(struct phonethdr) > dev->mtu) {
@@ -192,6 +243,16 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
skb->priority = 0;
skb->dev = dev;
+ PN_PRINTK("pn_send rdev %x sdev %x res %x robj %x sobj %x netdev=%s\n",
+ ph->pn_rdev, ph->pn_sdev, ph->pn_res,
+ ph->pn_robj, ph->pn_sobj, dev->name);
+ PN_DATA_PRINTK("PHONET : skb data = %d\nPHONET :", skb->len);
+ for (i = 1; i <= skb->len; i++) {
+ PN_DATA_PRINTK(" %02x", skb->data[i-1]);
+ if ((i%8) == 0)
+ PN_DATA_PRINTK("\n");
+ }
+
if (skb->pkt_type == PACKET_LOOPBACK) {
skb_reset_mac_header(skb);
skb_orphan(skb);
@@ -210,6 +271,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
return err;
drop:
+ printk(KERN_DEBUG "pn_send DROP\n");
kfree_skb(skb);
return err;
}
@@ -287,6 +349,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
return err;
drop:
+ printk(KERN_DEBUG "pn_skb_send DROP\n");
kfree_skb(skb);
if (dev)
dev_put(dev);
@@ -376,6 +439,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
struct phonethdr *ph;
struct sockaddr_pn sa;
u16 len;
+ int i;
/* check we have at least a full Phonet header */
if (!pskb_pull(skb, sizeof(struct phonethdr)))
@@ -393,6 +457,22 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
pn_skb_get_dst_sockaddr(skb, &sa);
+ PN_PRINTK("PN rcv: hdr rdev %x sdev %x res %x robj %x sobj %x dev=%s\n",
+ ph->pn_rdev, ph->pn_sdev, ph->pn_res,
+ ph->pn_robj, ph->pn_sobj, dev->name);
+ PN_DATA_PRINTK("PHONET : skb data = %d\nPHONET :", skb->len);
+ for (i = 1; i <= skb->len; i++) {
+ PN_DATA_PRINTK(" %02x", skb->data[i-1]);
+ if ((i%8) == 0)
+ PN_DATA_PRINTK("\n");
+ }
+
+ /* check if this is multicasted */
+ if (pn_sockaddr_get_object(&sa) == PNOBJECT_MULTICAST) {
+ pn_deliver_sock_broadcast(net, skb);
+ goto out;
+ }
+
/* check if this is broadcasted */
if (pn_sockaddr_get_addr(&sa) == PNADDR_BROADCAST) {
pn_deliver_sock_broadcast(net, skb);
@@ -402,14 +482,17 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
/* resource routing */
if (pn_sockaddr_get_object(&sa) == 0) {
struct sock *sk = pn_find_sock_by_res(net, sa.spn_resource);
- if (sk)
+ if (sk) {
+ printk(KERN_DEBUG "phonet new resource routing!\n");
return sk_receive_skb(sk, skb, 0);
+ }
}
/* check if we are the destination */
if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
/* Phonet packet input */
- struct sock *sk = pn_find_sock_by_sa(net, &sa);
+/*!*/ struct sock *sk = pn_find_sock_by_sa_and_skb(net, &sa, skb);
+ /*struct sock *sk = pn_find_sock_by_sa(net, &sa);*/
if (sk)
return sk_receive_skb(sk, skb, 0);
@@ -454,6 +537,7 @@ out_dev:
out:
kfree_skb(skb);
+ printk(KERN_DEBUG "phonet_rcv Drop message!\n");
return NET_RX_DROP;
}
@@ -503,6 +587,12 @@ static int __init phonet_init(void)
{
int err;
+#ifdef ACTIVATE_PHONET_DEBUG
+ err = sysfs_create_file(kernel_kobj, &phonet_attr.attr);
+ if (err)
+ printk(KERN_DEBUG "phonet sysfs_create_file failed: %d\n", err);
+#endif
+
err = phonet_device_init();
if (err)
return err;
diff --git a/net/phonet/ld_phonet.c b/net/phonet/ld_phonet.c
new file mode 100644
index 000000000000..2a8d4aad51f6
--- /dev/null
+++ b/net/phonet/ld_phonet.c
@@ -0,0 +1,644 @@
+/*
+ * Phonet device TTY line discipline
+ *
+ * Copyright (c) 1999-2002 RMC
+ *
+ *
+ *
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/serio.h>
+#include <linux/tty.h>
+
+#include <asm/unaligned.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_phonet.h>
+#include <linux/phonet.h>
+#include <net/phonet/phonet.h>
+#include <net/phonet/pn_dev.h>
+
+MODULE_AUTHOR("david RMC");
+MODULE_DESCRIPTION("Phonet TTY line discipline");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_PHONET);
+
+#define SEND_QUEUE_LOW 10
+#define SEND_QUEUE_HIGH 100
+#define PHONET_SENDING 1 /* Bit 1 = 0x02*/
+#define PHONET_FLOW_OFF_SENT 4 /* Bit 4 = 0x10 */
+#define MAX_WRITE_CHUNK 8192
+#define ISI_MSG_HEADER_SIZE 6
+#define MAX_BUFF_SIZE 20000
+
+#define LD_PHONET_NEW_ISI_MSG 0
+#define LD_PHONET_ISI_MSG_LEN 1
+#define LD_PHONET_ISI_MSG_NO_LEN 2
+
+#define PN_MEDIA_USB 0x1B
+
+struct ld_phonet {
+ struct tty_struct *tty;
+ wait_queue_head_t wait;
+ spinlock_t lock;
+ unsigned long flags;
+ struct sk_buff *skb;
+ unsigned long len;
+ unsigned long lentorcv;
+ unsigned long datarcv ;
+ unsigned long state;
+ struct net_device *dev;
+ struct list_head node;
+ struct sk_buff_head head;
+ char *tty_name;
+ int ld_phonet_state;
+ int n_Data_Processed;
+ int n_Data_Sent;
+ int n_Remaining_Data;
+ bool link_up;
+ int nb_try_to_tx;
+};
+
+
+static int ld_pn_net_open(struct net_device *dev)
+{
+ netif_wake_queue(dev);
+ return 0;
+}
+
+static int ld_pn_net_close(struct net_device *dev)
+{
+ netif_stop_queue(dev);
+ return 0;
+}
+
+static int ld_pn_handle_tx(struct ld_phonet *ld_pn)
+{
+ struct tty_struct *tty = ld_pn->tty;
+ struct sk_buff *skb;
+ int tty_wr, len, room, i;
+ PN_PRINTK("Write Data in tty\n");
+ if (tty == NULL)
+ return 0;
+ /* Enter critical section */
+ if (test_and_set_bit(PHONET_SENDING, &ld_pn->state))
+ return 0;
+
+ /* skb_peek is safe because handle_tx is called after skb_queue_tail */
+ while ((skb = skb_peek(&ld_pn->head)) != NULL) {
+
+ /* Make sure you don't write too much */
+ len = skb->len;
+ room = tty_write_room(tty);
+
+ if (!room) {
+ if (ld_pn->nb_try_to_tx++ > 40) {
+ ld_pn->link_up = false;
+ /* Flush TX queue */
+ while ((skb = \
+ skb_dequeue(&ld_pn->head)) != NULL) {
+ skb->dev->stats.tx_dropped++;
+ if (in_interrupt())
+ dev_kfree_skb_irq(skb);
+ else
+ kfree_skb(skb);
+ }
+ }
+ break;
+ }
+
+ /* Get room => reset nb_try_to_tx counter */
+ ld_pn->nb_try_to_tx = 0;
+
+ if (room > MAX_WRITE_CHUNK)
+ room = MAX_WRITE_CHUNK;
+ if (len > room)
+ len = room;
+
+
+ tty_wr = tty->ops->write(tty, skb->data, len);
+ ld_pn->dev->stats.tx_packets++;
+ ld_pn->dev->stats.tx_bytes += tty_wr;
+ PN_DATA_PRINTK("PHONET: write data in tty\n");
+ for (i = 1; i <= len; i++) {
+ PN_DATA_PRINTK(" %02x", skb->data[i-1]);
+ if ((i%8) == 0)
+ PN_DATA_PRINTK("\n");
+ }
+ PN_DATA_PRINTK("\n");
+ /* Error on TTY ?! */
+ if (tty_wr < 0)
+ goto error;
+ /* Reduce buffer written, and discard if empty */
+ skb_pull(skb, tty_wr);
+ if (skb->len == 0) {
+ struct sk_buff *tmp = skb_dequeue(&ld_pn->head);
+ BUG_ON(tmp != skb);
+ if (in_interrupt())
+ dev_kfree_skb_irq(skb);
+ else
+ kfree_skb(skb);
+ }
+ }
+ /* Send flow off if queue is empty */
+ clear_bit(PHONET_SENDING, &ld_pn->state);
+
+ return 0;
+error:
+ clear_bit(PHONET_SENDING, &ld_pn->state);
+ return tty_wr;
+}
+
+
+
+static int ld_pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ld_phonet *ld_pn;
+ u8 *ptr;
+
+ BUG_ON(dev == NULL);
+ ld_pn = netdev_priv(dev);
+ /* Add special Pattern before each ISI message */
+ ptr = skb_push(skb, 6);
+ ptr[0] = 0xdd;
+ ptr[1] = 0x7f;
+ ptr[2] = 0x21;
+ ptr[3] = 0x9a;
+ ptr[4] = skb->data[10];
+ ptr[5] = skb->data[11];
+
+ if (ld_pn->link_up == true) {
+ skb_queue_tail(&ld_pn->head, skb);
+ return ld_pn_handle_tx(ld_pn);
+ } else {
+ if (tty_write_room(ld_pn->tty)) {
+ /* link is up again */
+ ld_pn->link_up = true;
+ ld_pn->nb_try_to_tx = 0;
+
+ skb_queue_tail(&ld_pn->head, skb);
+ return ld_pn_handle_tx(ld_pn);
+ } else {
+ if (in_interrupt())
+ dev_kfree_skb_irq(skb);
+ else
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
+ }
+}
+
+static int
+ld_pn_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int ret = 0;
+ switch (cmd) {
+ case SIOCPNGAUTOCONF:
+ ret = phonet_address_add(dev, PN_MEDIA_USB);
+ if (ret)
+ return ret;
+ phonet_address_notify(RTM_NEWADDR, dev, PN_MEDIA_USB);
+ phonet_route_add(dev, PN_DEV_PC);
+ dev_open(dev);
+ netif_carrier_on(dev);
+ /* Return NOIOCTLCMD so Phonet won't do it again */
+ return -ENOIOCTLCMD;
+ }
+ return -ENOIOCTLCMD;
+}
+
+static int ld_pn_net_mtu(struct net_device *dev, int new_mtu)
+{
+ if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+
+
+static const struct net_device_ops ld_pn_netdev_ops = {
+ .ndo_open = ld_pn_net_open,
+ .ndo_stop = ld_pn_net_close,
+ .ndo_start_xmit = ld_pn_net_xmit,
+ .ndo_do_ioctl = ld_pn_net_ioctl,
+ .ndo_change_mtu = ld_pn_net_mtu,
+};
+
+
+
+
+
+static void ld_pn_net_setup(struct net_device *dev)
+{
+ dev->features = 0;
+ dev->type = ARPHRD_PHONET;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ dev->mtu = PHONET_DEV_MTU;
+ dev->hard_header_len = 1;
+ dev->dev_addr[0] = PN_MEDIA_USB;
+ dev->addr_len = 1;
+ dev->tx_queue_len = 5;
+
+ dev->netdev_ops = &ld_pn_netdev_ops;
+ dev->destructor = free_netdev;
+ dev->header_ops = &phonet_header_ops;
+};
+
+
+/*****************************************
+*** TTY
+******************************************/
+static int ld_phonet_ldisc_open(struct tty_struct *tty)
+{
+
+ struct ld_phonet *ld_pn;
+ struct net_device *dev;
+ int err = 0;
+ /* Create net device */
+ dev = alloc_netdev(sizeof(*ld_pn), "upnlink%d", ld_pn_net_setup);
+ if (!dev)
+ return -ENOMEM;
+
+ ld_pn = netdev_priv(dev);
+ spin_lock_init(&ld_pn->lock);
+ netif_carrier_off(dev);
+ skb_queue_head_init(&ld_pn->head);
+ ld_pn->tty = tty;
+ tty->disc_data = ld_pn;
+ tty->receive_room = 65536;
+ ld_pn->dev = dev;
+ ld_pn->skb = NULL;
+ ld_pn->len = 0;
+ ld_pn->lentorcv = 0;
+ ld_pn->datarcv = 0 ;
+ ld_pn->ld_phonet_state = LD_PHONET_NEW_ISI_MSG;
+ ld_pn->n_Data_Processed = 0;
+ ld_pn->n_Data_Sent = 0;
+ ld_pn->n_Remaining_Data = 0;
+ ld_pn->link_up = true;
+ ld_pn->nb_try_to_tx = 0;
+
+ err = register_netdev(dev);
+
+ if (err)
+ free_netdev(dev);
+
+
+ return err;
+
+}
+
+
+
+static void ld_phonet_ldisc_close(struct tty_struct *tty)
+{
+ struct ld_phonet *ld_pn = tty->disc_data;
+
+ tty->disc_data = NULL;
+ ld_pn->tty = NULL;
+ unregister_netdev(ld_pn->dev);
+ /*free_netdev(ld_pn->dev); David a checker*/
+}
+
+static void ld_phonet_ldisc_initiate_transfer \
+(struct ld_phonet *ld_pn, const unsigned char *cp, int count)
+{
+
+ struct sk_buff *skb = NULL;
+ unsigned int msglen = 0;
+
+ struct phonethdr *ph = NULL;
+
+ /* Check if there is still data in cp */
+ while (ld_pn->n_Data_Processed < count) {
+ /* Check if extract length is possible */
+ if (count > ISI_MSG_HEADER_SIZE) {
+ /* Extract length */
+ /* Move one byte since media parameter
+ is not there in phonethdr structure */
+ ph = (struct phonethdr *) \
+ (cp + ld_pn->n_Data_Processed + sizeof(char));
+ msglen = get_unaligned_be16(&ph->pn_length);
+ ld_pn->len = msglen + ISI_MSG_HEADER_SIZE;
+
+ /* Alloc SKBuff */
+ skb = netdev_alloc_skb(ld_pn->dev, ld_pn->len);
+ if (NULL == skb) {
+ /* TBD handle error */
+ return;
+ }
+
+ skb->dev = ld_pn->dev;
+ skb->protocol = htons(ETH_P_PHONET);
+ skb_reset_mac_header(skb);
+ ld_pn->skb = skb;
+
+ /* check if we receive complete
+ data in this usb frame */
+ if (ld_pn->len <= count) {
+ /* We receive complete data
+ in this usb frame */
+ /* copy the ISI buffer */
+ memcpy(skb_put(skb, ld_pn->len), \
+ cp + ld_pn->n_Data_Processed, ld_pn->len);
+ ld_pn->n_Data_Processed += ld_pn->len;
+
+ /* Send to Phonet */
+ ld_pn->dev->stats.rx_packets++;
+ ld_pn->dev->stats.rx_bytes += skb->len;
+ __skb_pull(skb, 1);
+ /* we remove media id
+ (Why ? because we always do it ;-)) */
+
+ netif_rx(skb);
+ ld_pn->n_Data_Sent += ld_pn->len;
+
+ /* TBD : Reset pointers */
+ } else {
+ /* We receive only partial ISI message */
+ /* Copy the partial ISI message */
+ memcpy(skb_put(skb, count - \
+ ld_pn->n_Data_Sent), cp + \
+ ld_pn->n_Data_Processed, count - \
+ ld_pn->n_Data_Sent);
+ ld_pn->ld_phonet_state = LD_PHONET_ISI_MSG_LEN;
+ ld_pn->n_Remaining_Data = ld_pn->len - \
+ (count - ld_pn->n_Data_Sent);
+ ld_pn->n_Data_Processed += count - \
+ ld_pn->n_Data_Sent;
+
+ return;
+ }
+ } else {
+ /* Not able to extract length since received
+ usb frame length is
+ less than ISI message header size */
+
+ /* Alloc SKBuff with max size */
+ skb = netdev_alloc_skb(ld_pn->dev, MAX_BUFF_SIZE);
+ if (NULL == skb) {
+ /* TBD handle error */
+ return;
+ }
+
+ skb->dev = ld_pn->dev;
+ skb->protocol = htons(ETH_P_PHONET);
+ skb_reset_mac_header(skb);
+ ld_pn->skb = skb;
+
+ /* Copy available data */
+ memcpy(skb_put(skb, count - ld_pn->n_Data_Sent), \
+ cp + ld_pn->n_Data_Processed, count - \
+ ld_pn->n_Data_Sent);
+ ld_pn->ld_phonet_state = LD_PHONET_ISI_MSG_NO_LEN;
+ ld_pn->n_Data_Processed += count - ld_pn->n_Data_Sent;
+ ld_pn->len += count - ld_pn->n_Data_Sent;
+
+ return;
+ }
+ }
+ /* No more data in cp */
+ ld_pn->ld_phonet_state = LD_PHONET_NEW_ISI_MSG;
+ ld_pn->len = 0;
+ ld_pn->n_Data_Processed = 0;
+ ld_pn->n_Data_Sent = 0;
+ ld_pn->n_Remaining_Data = 0;
+
+ return;
+}
+
+
+static void ld_phonet_ldisc_receive
+(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
+{
+ struct ld_phonet *ld_pn = tty->disc_data;
+ struct sk_buff *skb = ld_pn->skb;
+ unsigned long flags = 0;
+ unsigned int msglen = 0;
+ unsigned int i = 0;
+
+ struct phonethdr *ph = NULL;
+
+ PN_DATA_PRINTK("PHONET : Receive Data From tty = %d\nPHONET :", count);
+ for (i = 1; i <= count; i++) {
+ PN_DATA_PRINTK(" %02x", cp[i-1]);
+ if ((i%8) == 0)
+ PN_DATA_PRINTK("\n");
+ }
+
+ if (ld_pn->link_up == false) {
+ /* data received from PC => can TX */
+ ld_pn->link_up = true;
+
+ ld_pn->nb_try_to_tx = 0;
+ }
+
+ spin_lock_irqsave(&ld_pn->lock, flags);
+
+ /*Whenever you receive a new USB frame Data Processed should be reset*/
+ ld_pn->n_Data_Processed = 0;
+
+ switch (ld_pn->ld_phonet_state) {
+ case LD_PHONET_NEW_ISI_MSG:
+ PN_PRINTK("ld_phonet : new_isi_msg\n");
+ ld_phonet_ldisc_initiate_transfer(ld_pn, cp, count);
+ break;
+
+ case LD_PHONET_ISI_MSG_LEN:
+ /* check if Remaining Data is complete */
+ PN_PRINTK("ld_phonet : isi_msg_len\n");
+ if (ld_pn->n_Remaining_Data > count) {
+ /* We dont receive complete data */
+ /* Copy the available data */
+ memcpy(skb_put(skb, count), cp + \
+ ld_pn->n_Data_Processed, count);
+ ld_pn->n_Data_Processed += count;
+ ld_pn->ld_phonet_state = LD_PHONET_ISI_MSG_LEN;
+ ld_pn->n_Remaining_Data -= count;
+ } else {
+ /* We have complete data available */
+ /* Copy remaining data */
+ memcpy(skb_put(skb, ld_pn->n_Remaining_Data), \
+ cp + ld_pn->n_Data_Processed, ld_pn->n_Remaining_Data);
+ /* Send to Phonet */
+ ld_pn->dev->stats.rx_packets++;
+ ld_pn->dev->stats.rx_bytes += skb->len;
+ __skb_pull(skb, sizeof(char));
+ netif_rx(skb);
+ ld_pn->n_Data_Sent += ld_pn->len;
+
+ /* TBD : Update pointers */
+ ld_pn->n_Data_Sent += ld_pn->n_Remaining_Data;
+ ld_pn->n_Data_Processed += ld_pn->n_Remaining_Data;
+
+ /* Initiate a new ISI transfer */
+ ld_phonet_ldisc_initiate_transfer(ld_pn, cp, count);
+ }
+ break;
+
+ case LD_PHONET_ISI_MSG_NO_LEN:
+ /*Check if we can extact length */
+ PN_PRINTK("ld_phonet : isi_msg_no_len\n");
+ if ((ld_pn->len + count) >= ISI_MSG_HEADER_SIZE) {
+
+ /* Copy remaining header to SKBuff to extract length */
+ memcpy(skb_put(skb, ISI_MSG_HEADER_SIZE - ld_pn->len),\
+ cp + ld_pn->n_Data_Processed, ISI_MSG_HEADER_SIZE - \
+ ld_pn->len);
+ ph = (struct phonethdr *) (skb->data + sizeof(char));
+ msglen = get_unaligned_be16(&ph->pn_length);
+
+ ld_pn->n_Data_Processed += \
+ ISI_MSG_HEADER_SIZE - ld_pn->len;
+
+ /* Check if we receive complete data */
+ if ((count + ld_pn->len) < \
+ (msglen + ISI_MSG_HEADER_SIZE)) {
+ /* We have not received complete data */
+ /* Copy available data */
+ memcpy(skb_put(skb, count - \
+ (ISI_MSG_HEADER_SIZE - ld_pn->len)), \
+ cp + ld_pn->n_Data_Processed, count - \
+ (ISI_MSG_HEADER_SIZE - ld_pn->len));
+ ld_pn->ld_phonet_state = LD_PHONET_ISI_MSG_LEN;
+ ld_pn->n_Remaining_Data = (msglen + \
+ ISI_MSG_HEADER_SIZE) - (count + ld_pn->len);
+ ld_pn->n_Data_Processed += count - \
+ (ISI_MSG_HEADER_SIZE - ld_pn->len);
+
+ /* Reset pointers */
+ ld_pn->len = msglen + ISI_MSG_HEADER_SIZE;
+
+ /*return;*/
+ break;
+ } else {
+ /* We receive complete data */
+ /* Copy remaining data */
+ /*memcpy(skb_put(skb, msglen), cp + \
+ ld_pn->n_Data_Processed \
+ + (ISI_MSG_HEADER_SIZE - ld_pn->len), \
+ (msglen + ISI_MSG_HEADER_SIZE) - ld_pn->len);*/
+ memcpy( \
+ skb_put(skb, (msglen + ISI_MSG_HEADER_SIZE) - \
+ (ld_pn->len + ld_pn->n_Data_Processed)), \
+ cp + ld_pn->n_Data_Processed, \
+ (msglen + ISI_MSG_HEADER_SIZE) - \
+ (ld_pn->len + ld_pn->n_Data_Processed));
+
+ /* Send to Phonet */
+ ld_pn->dev->stats.rx_packets++;
+ ld_pn->dev->stats.rx_bytes += skb->len;
+ __skb_pull(skb, sizeof(char));
+ netif_rx(skb);
+
+ /* Update pointers */
+ /*ld_pn->n_Data_Sent += \
+ (msglen + ISI_MSG_HEADER_SIZE) - ld_pn->len;
+ ld_pn->n_Data_Processed += \
+ (msglen + ISI_MSG_HEADER_SIZE) - ld_pn->len;*/
+
+ ld_pn->n_Data_Sent += \
+ (msglen + ISI_MSG_HEADER_SIZE) - \
+ (ld_pn->len + ld_pn->n_Data_Processed);
+
+ ld_pn->n_Data_Processed += \
+ (msglen + ISI_MSG_HEADER_SIZE) - \
+ (ld_pn->len + ld_pn->n_Data_Processed);
+
+ /* Check if we still have data in cp */
+ /*if (count > ld_pn->n_Data_Sent) {*/
+ if (count > ld_pn->n_Data_Processed) {
+ /* We still have data in cp */
+ /* Initiate a new ISI transfer */
+ ld_phonet_ldisc_initiate_transfer\
+ (ld_pn, cp, count);
+ } else {
+ /* No more data in cp */
+ ld_pn->ld_phonet_state = \
+ LD_PHONET_NEW_ISI_MSG;
+
+ /* Reset pointers */
+ ld_pn->len = 0;
+ ld_pn->n_Data_Processed = 0;
+ ld_pn->n_Data_Sent = 0;
+ ld_pn->n_Remaining_Data = 0;
+ }
+ }
+ } else {
+ /* Cannot extract length */
+ /* Copy available data */
+ memcpy(skb_put(skb, count), cp + \
+ ld_pn->n_Data_Processed, count);
+ ld_pn->len += count;
+ ld_pn->ld_phonet_state = \
+ LD_PHONET_ISI_MSG_NO_LEN;
+ ld_pn->n_Data_Processed += count;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ spin_unlock_irqrestore(&ld_pn->lock, flags);
+
+}
+
+static void ld_phonet_ldisc_write_wakeup(struct tty_struct *tty)
+{
+
+ struct ld_phonet *ld_pn;
+ ld_pn = tty->disc_data;
+ BUG_ON(ld_pn == NULL);
+ BUG_ON(ld_pn->tty != tty);
+ ld_pn_handle_tx(ld_pn);
+}
+
+static struct tty_ldisc_ops ld_phonet_ldisc = {
+ .owner = THIS_MODULE,
+ .name = "phonet",
+ .open = ld_phonet_ldisc_open,
+ .close = ld_phonet_ldisc_close,
+ .receive_buf = ld_phonet_ldisc_receive,
+ .write_wakeup = ld_phonet_ldisc_write_wakeup
+};
+
+/*
+ * The functions for insering/removing us as a module.
+ */
+
+static int __init ld_phonet_init(void)
+{
+ int retval;
+ retval = tty_register_ldisc(N_PHONET, &ld_phonet_ldisc);
+
+ return retval;
+}
+
+static void __exit ld_phonet_exit(void)
+{
+ tty_unregister_ldisc(N_PHONET);
+}
+
+
+
+
+module_init(ld_phonet_init);
+module_exit(ld_phonet_exit);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 45a7df6575de..3ae6f3d4db8c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -298,7 +298,7 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what,
switch (what) {
case NETDEV_REGISTER:
- if (dev->type == ARPHRD_PHONET)
+ if (dev->type == ARPHRD_PHONET || dev->type == ARPHRD_MHI)
phonet_device_autoconf(dev);
break;
case NETDEV_UNREGISTER:
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1afd1381cdc7..78998a25a2d0 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -74,6 +74,93 @@ static struct hlist_head *pn_hash_list(u16 obj)
* Find address based on socket address, match only certain fields.
* Also grab sock if it was found. Remember to sock_put it later.
*/
+struct sock *pn_find_sock_by_sa_and_skb(struct net *net,
+ const struct sockaddr_pn *spn, struct sk_buff *skb)
+{
+ struct sock *sknode;
+ struct sock *rval = NULL;
+ u16 obj = pn_sockaddr_get_object(spn);
+ u8 res = spn->spn_resource;
+ struct hlist_head *hlist = pnsocks.hlist;
+ unsigned h;
+ u8 type;
+ u8 subtype;
+
+ rcu_read_lock();
+
+ for (h = 0; h < PN_HASHSIZE; h++) {
+ sk_for_each_rcu(sknode, hlist) {
+ struct pn_sock *pn = pn_sk(sknode);
+ BUG_ON(!pn->sobject); /* unbound socket */
+ if (!net_eq(sock_net(sknode), net))
+ continue;
+
+ if ((PN_PREFIX == pn->resource) && (PN_PREFIX == res)) {
+ if (skb_shinfo(skb)->nr_frags) {
+ struct page *msg_page;
+ u8 *msg;
+ skb_frag_t *msg_frag = &skb_shinfo(skb)
+ ->frags[0];
+
+ msg_page = skb_frag_page(msg_frag);
+ msg = page_address(msg_page);
+
+ type = msg[msg_frag->page_offset+2];
+ subtype = msg[msg_frag->page_offset+3];
+
+ } else {
+ type = *(skb->data + 2);
+ subtype = *(skb->data + 3);
+ }
+
+ if (type != pn->resource_type)
+ continue;
+
+ if (subtype != pn->resource_subtype) {
+ /* printk("phonet resource subtype
+ don't match! type %d,
+ subtype %d pn->resource_subtype
+ %d\n", type, subtype,
+ pn->resource_subtype); */
+ continue;
+ }
+ /* printk("phonet resource match! type %d,
+ subtype %d\n", type, subtype); */
+ }
+
+ /* If port is zero, look up by resource */
+ if (pn_port(obj)) {
+ /* Look up socket by port */
+ if (pn_port(pn->sobject) != pn_port(obj))
+ continue;
+ } else {
+
+ /* If port is zero, look up by resource */
+ if (pn->resource != res)
+ continue;
+ }
+
+ if (pn_addr(pn->sobject) &&
+ pn_addr(pn->sobject) != pn_addr(obj))
+ continue;
+
+ rval = sknode;
+ sock_hold(sknode);
+ goto out;
+ }
+ hlist++;
+ }
+
+out:
+ rcu_read_unlock();
+
+ return rval;
+}
+
+/*
+Find address based on socket address, match only certain fields.
+Also grab sock if it was found. Remember to sock_put it later.
+*/
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
{
struct sock *sknode;
@@ -369,6 +456,33 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
+ printk(KERN_DEBUG "pn_socket_ioctl with %s\n",
+ (cmd == SIOCCONFIGTYPE) ? "SIOCCONFIGTYPE" :
+ (cmd == SIOCCONFIGSUBTYPE) ? "SIOCCONFIGSUBTYPE" : "OTHER");
+
+ if (cmd == SIOCCONFIGTYPE) {
+ u16 type;
+ if (get_user(type, (__u16 __user *)arg))
+ return -EFAULT;
+
+ printk(KERN_DEBUG "Phonet register resource type %d on %p\n",
+ type, pn);
+ pn->resource_type = type;
+ return 0;
+ }
+
+ if (cmd == SIOCCONFIGSUBTYPE) {
+ u16 subtype;
+
+ if (get_user(subtype, (__u16 __user *)arg))
+ return -EFAULT;
+
+ pn->resource_subtype = subtype;
+ /* printk(KERN_DEBUG "Phonet register resource subtype %d on %x
+ \n", subtype, pn); */
+ return 0;
+ }
+
if (cmd == SIOCPNGETOBJECT) {
struct net_device *dev;
u16 handle;
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 78efe895b663..8e12c8a2b82b 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -10,6 +10,11 @@ menuconfig RFKILL
To compile this driver as a module, choose M here: the
module will be called rfkill.
+config RFKILL_PM
+ bool "Power off on suspend"
+ depends on RFKILL && PM
+ default y
+
# LED trigger support
config RFKILL_LEDS
bool
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1cec5e4f3a5e..c099b4fffd93 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -792,6 +792,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)
}
EXPORT_SYMBOL(rfkill_pause_polling);
+#ifdef CONFIG_RFKILL_PM
void rfkill_resume_polling(struct rfkill *rfkill)
{
BUG_ON(!rfkill);
@@ -826,14 +827,17 @@ static int rfkill_resume(struct device *dev)
return 0;
}
+#endif
static struct class rfkill_class = {
.name = "rfkill",
.dev_release = rfkill_release,
.dev_attrs = rfkill_dev_attrs,
.dev_uevent = rfkill_dev_uevent,
+#ifdef CONFIG_RFKILL_PM
.suspend = rfkill_suspend,
.resume = rfkill_resume,
+#endif
};
bool rfkill_blocked(struct rfkill *rfkill)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index fb076cd6f808..d1cc03d211d8 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -57,10 +57,10 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
if (gpio_is_valid(rfkill->pdata->reset_gpio))
gpio_direction_output(rfkill->pdata->reset_gpio, 0);
if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
- clk_disable(rfkill->pwr_clk);
+ clk_disable_unprepare(rfkill->pwr_clk);
} else {
if (rfkill->pwr_clk && PWR_CLK_DISABLED(rfkill))
- clk_enable(rfkill->pwr_clk);
+ clk_prepare_enable(rfkill->pwr_clk);
if (gpio_is_valid(rfkill->pdata->reset_gpio))
gpio_direction_output(rfkill->pdata->reset_gpio, 1);
if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
@@ -204,7 +204,7 @@ static int rfkill_gpio_remove(struct platform_device *pdev)
if (gpio_is_valid(rfkill->pdata->reset_gpio))
gpio_free(rfkill->pdata->reset_gpio);
if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
- clk_disable(rfkill->pwr_clk);
+ clk_disable_unprepare(rfkill->pwr_clk);
if (rfkill->pwr_clk)
clk_put(rfkill->pwr_clk);
kfree(rfkill->shutdown_name);
diff --git a/net/socket.c b/net/socket.c
index fc90b4f0da3c..e85c4ec47a13 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -56,6 +56,8 @@
* paradigm.
*
* Based upon Swansea University Computer Society NET3.039
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*/
#include <linux/mm.h>
@@ -914,6 +916,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+ SOCK_INODE(sock)->i_private = get_thread_process(current);
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}
@@ -1149,6 +1152,7 @@ static unsigned int sock_poll(struct file *file, poll_table *wait)
* We can't return errors to poll, so it's either yes or no.
*/
sock = file->private_data;
+ SOCK_INODE(sock)->i_private = get_thread_process(current);
return sock->ops->poll(file, sock, wait);
}
@@ -1385,6 +1389,7 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
if (retval < 0)
goto out_release;
+ SOCK_INODE(sock)->i_private = get_thread_process(current);
out:
/* It may be already another descriptor 8) Not kernel problem. */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5356b120dbf8..77d251e02593 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -254,7 +254,7 @@ static int rpc_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- freezable_schedule();
+ freezable_schedule_unsafe();
return 0;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 75e198d029d2..c80c107139f1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,7 @@
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
+#include <linux/freezer.h>
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
@@ -1896,7 +1897,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
unix_state_unlock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 16d08b399210..4c602d100480 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -166,3 +166,14 @@ config LIB80211_DEBUG
from lib80211.
If unsure, say N.
+
+config CFG80211_ALLOW_RECONNECT
+ bool "Allow reconnect while already connected"
+ depends on CFG80211
+ default n
+ help
+ cfg80211 stack doesn't allow to connect if you are already
+ connected. This option allows to make a connection in this case.
+
+ Select this option ONLY for wlan drivers that are specifically
+ built for such purposes.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 64fcbae020d2..97accb853423 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -412,6 +412,10 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
const struct ieee80211_iface_combination *c;
int i, j;
+ /* If we have combinations enforce them */
+ if (wiphy->n_iface_combinations)
+ wiphy->flags |= WIPHY_FLAG_ENFORCE_COMBINATIONS;
+
for (i = 0; i < wiphy->n_iface_combinations; i++) {
u32 cnt = 0;
u16 all_iftypes = 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 448c034184e2..b145256392be 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -289,6 +289,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
[NL80211_ATTR_IE] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
+#ifdef CONFIG_MAC80211_SCAN_ABORT
+ [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
+#endif
[NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
[NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
@@ -5241,6 +5244,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
+#ifdef CONFIG_MAC80211_SCAN_ABORT
+ if (info->attrs[NL80211_ATTR_SCAN_FLAGS])
+ request->flags = nla_get_u32(
+ info->attrs[NL80211_ATTR_SCAN_FLAGS]);
+#endif
+
request->wdev = wdev;
request->wiphy = &rdev->wiphy;
request->scan_start = jiffies;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 81019ee3ddc8..4db2177a69ea 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -55,7 +55,7 @@
* also linked into the probe response struct.
*/
-#define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ)
+#define IEEE80211_SCAN_RESULT_EXPIRE (3 * HZ)
static void bss_free(struct cfg80211_internal_bss *bss)
{
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3ed35c345cae..e2f74e66a169 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -707,8 +707,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
return;
+#ifndef CONFIG_CFG80211_ALLOW_RECONNECT
if (wdev->sme_state != CFG80211_SME_CONNECTED)
return;
+#endif
if (wdev->current_bss) {
cfg80211_unhold_bss(wdev->current_bss);
@@ -785,10 +787,14 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
+#ifndef CONFIG_CFG80211_ALLOW_RECONNECT
if (wdev->sme_state != CFG80211_SME_IDLE)
return -EALREADY;
if (WARN_ON(wdev->connect_keys)) {
+#else
+ if (wdev->connect_keys) {
+#endif
kfree(wdev->connect_keys);
wdev->connect_keys = NULL;
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f5ad4d94ba88..6b0ccfcec092 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1257,6 +1257,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
return 0;
}
+ /*
+ * Drivers will gradually all set this flag, until all
+ * have it we only enforce for those that set it.
+ */
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_ENFORCE_COMBINATIONS))
+ return 0;
+
memset(num, 0, sizeof(num));
memset(used_channels, 0, sizeof(used_channels));