From f887cc48c6464af20058e81544e73e33daacbd52 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 2 Oct 2012 11:20:02 -0700
Subject: net/tipc: remove depends on CONFIG_EXPERIMENTAL

The CONFIG_EXPERIMENTAL config item has not carried much meaning for a
while now and is almost always enabled by default. As agreed during the
Linux kernel summit, remove it from any "depends on" lines in Kconfigs.

CC: Jon Maloy <jon.maloy@ericsson.com>
CC: Allan Stephens <allan.stephens@windriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/tipc')
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index bc41bd31eadc..4f99600a5fed 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -3,8 +3,8 @@
 #
 
 menuconfig TIPC
-	tristate "The TIPC Protocol (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
+	tristate "The TIPC Protocol"
+	depends on INET
 	---help---
 	  The Transparent Inter Process Communication (TIPC) protocol is
 	  specially designed for intra cluster communication. This protocol
-- 
cgit v1.2.3


From c5c73dca596894c47760e4e955877b731ffabf57 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 14 Feb 2013 14:43:33 +0000
Subject: tipc: fix missing spinlock init in broadcast code

After commit 3c294cb3 "tipc: remove the bearer congestion mechanism",
we try to grab the broadcast bearer lock when sending multicast
messages over the broadcast link. This will cause an oops because
the lock is never initialized. This is an old bug, but the lock
was never actually used before commit 3c294cb3, so that why it was
not visible until now.  The oops will look something like:

	BUG: spinlock bad magic on CPU#2, daemon/147
	lock: bcast_bearer+0x48/0xffffffffffffd19a [tipc],
	.magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
	Pid: 147, comm: daemon Not tainted 3.8.0-rc3+ #206
	Call Trace:
	spin_dump+0x8a/0x8f
	spin_bug+0x21/0x26
	do_raw_spin_lock+0x114/0x150
	_raw_spin_lock_bh+0x19/0x20
	tipc_bearer_blocked+0x1f/0x40 [tipc]
	tipc_link_send_buf+0x82/0x280 [tipc]
	? __alloc_skb+0x9f/0x2b0
	tipc_bclink_send_msg+0x77/0xa0 [tipc]
	tipc_multicast+0x11b/0x1b0 [tipc]
	send_msg+0x225/0x530 [tipc]
	sock_sendmsg+0xca/0xe0

The above can be triggered by running the multicast demo program.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/tipc')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 54f89f90ac33..2655c9f4ecad 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -774,6 +774,7 @@ void tipc_bclink_init(void)
 	bcl->owner = &bclink->node;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
 	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
+	spin_lock_init(&bcbearer->bearer.lock);
 	bcl->b_ptr = &bcbearer->bearer;
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
-- 
cgit v1.2.3


From 57467e56293796f780e91a24600a732516f534ac Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sun, 20 Jan 2013 23:30:08 +0100
Subject: tipc: eliminate duplicated discard_rx_queue routine

The tipc function discard_rx_queue() is just a duplicated
implementation of __skb_queue_purge().  Remove the former
and directly invoke __skb_queue_purge().

In doing so, the underscores convey to the code reader, more
information about the current locking state that is assumed.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 net/tipc/socket.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b4e4833a484..f6ceecd44749 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -128,19 +128,6 @@ static void advance_rx_queue(struct sock *sk)
 	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
 }
 
-/**
- * discard_rx_queue - discard all buffers in socket receive queue
- *
- * Caller must hold socket lock
- */
-static void discard_rx_queue(struct sock *sk)
-{
-	struct sk_buff *buf;
-
-	while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
-		kfree_skb(buf);
-}
-
 /**
  * reject_rx_queue - reject all buffers in socket receive queue
  *
@@ -292,7 +279,7 @@ static int release(struct socket *sock)
 	res = tipc_deleteport(tport->ref);
 
 	/* Discard any remaining (connection-based) messages in receive queue */
-	discard_rx_queue(sk);
+	__skb_queue_purge(&sk->sk_receive_queue);
 
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
@@ -1637,7 +1624,7 @@ restart:
 	case SS_DISCONNECTING:
 
 		/* Discard any unreceived messages */
-		discard_rx_queue(sk);
+		__skb_queue_purge(&sk->sk_receive_queue);
 
 		/* Wake up anyone sleeping in poll */
 		sk->sk_state_change(sk);
-- 
cgit v1.2.3


From aba79f332f46ca8529f3e62a9fc2926c8fe75e44 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sun, 20 Jan 2013 23:30:09 +0100
Subject: tipc: byte-based overload control on socket receive queue

Change overload control to be purely byte-based, using
sk->sk_rmem_alloc as byte counter, and compare it to a calculated
upper limit for the socket receive queue.

For all connection messages, irrespective of message importance,
the overload limit is set to a constant value (i.e, 67MB). This
limit should normally never be reached because of the lower
limit used by the flow control algorithm, and is there only
as a last resort in case a faulty peer doesn't respect the send
window limit.

For datagram messages, message importance is taken into account
when calculating the overload limit. The calculation is based
on sk->sk_rcvbuf, and is hence configurable via the socket option
SO_RCVBUF.

Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 net/tipc/socket.c | 77 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f6ceecd44749..cbe2f6ecf07a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -43,7 +43,8 @@
 #define SS_LISTENING	-1	/* socket is listening */
 #define SS_READY	-2	/* socket is connectionless */
 
-#define OVERLOAD_LIMIT_BASE	10000
+#define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
+				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
 
 struct tipc_sock {
@@ -202,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
 
 	sock_init_data(sock, sk);
 	sk->sk_backlog_rcv = backlog_rcv;
-	sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
 	tipc_sk(sk)->p = tp_ptr;
@@ -1141,34 +1141,6 @@ static void tipc_data_ready(struct sock *sk, int len)
 	rcu_read_unlock();
 }
 
-/**
- * rx_queue_full - determine if receive queue can accept another message
- * @msg: message to be added to queue
- * @queue_size: current size of queue
- * @base: nominal maximum size of queue
- *
- * Returns 1 if queue is unable to accept message, 0 otherwise
- */
-static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
-{
-	u32 threshold;
-	u32 imp = msg_importance(msg);
-
-	if (imp == TIPC_LOW_IMPORTANCE)
-		threshold = base;
-	else if (imp == TIPC_MEDIUM_IMPORTANCE)
-		threshold = base * 2;
-	else if (imp == TIPC_HIGH_IMPORTANCE)
-		threshold = base * 100;
-	else
-		return 0;
-
-	if (msg_connected(msg))
-		threshold *= 4;
-
-	return queue_size >= threshold;
-}
-
 /**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsock: TIPC socket
@@ -1246,6 +1218,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
 	return retval;
 }
 
+/**
+ * rcvbuf_limit - get proper overload limit of socket receive queue
+ * @sk: socket
+ * @buf: message
+ *
+ * For all connection oriented messages, irrespective of importance,
+ * the default overload value (i.e. 67MB) is set as limit.
+ *
+ * For all connectionless messages, by default new queue limits are
+ * as belows:
+ *
+ * TIPC_LOW_IMPORTANCE       (5MB)
+ * TIPC_MEDIUM_IMPORTANCE    (10MB)
+ * TIPC_HIGH_IMPORTANCE      (20MB)
+ * TIPC_CRITICAL_IMPORTANCE  (40MB)
+ *
+ * Returns overload limit according to corresponding message importance
+ */
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	unsigned int limit;
+
+	if (msg_connected(msg))
+		limit = CONN_OVERLOAD_LIMIT;
+	else
+		limit = sk->sk_rcvbuf << (msg_importance(msg) + 5);
+	return limit;
+}
+
 /**
  * filter_rcv - validate incoming message
  * @sk: socket
@@ -1262,7 +1264,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_msg *msg = buf_msg(buf);
-	u32 recv_q_len;
+	unsigned int limit = rcvbuf_limit(sk, buf);
 	u32 res = TIPC_OK;
 
 	/* Reject message if it is wrong sort of message for socket */
@@ -1279,15 +1281,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 	}
 
 	/* Reject message if there isn't room to queue it */
-	recv_q_len = skb_queue_len(&sk->sk_receive_queue);
-	if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
-		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
-			return TIPC_ERR_OVERLOAD;
-	}
+	if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
+		return TIPC_ERR_OVERLOAD;
 
-	/* Enqueue message (finally!) */
+	/* Enqueue message */
 	TIPC_SKB_CB(buf)->handle = 0;
 	__skb_queue_tail(&sk->sk_receive_queue, buf);
+	skb_set_owner_r(buf, sk);
 
 	sk->sk_data_ready(sk, 0);
 	return TIPC_OK;
@@ -1336,7 +1336,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog(sk, buf, sk->sk_rcvbuf))
+		if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
@@ -1570,6 +1570,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 	} else {
 		__skb_dequeue(&sk->sk_receive_queue);
 		__skb_queue_head(&new_sk->sk_receive_queue, buf);
+		skb_set_owner_r(buf, new_sk);
 	}
 	release_sock(new_sk);
 
-- 
cgit v1.2.3


From 97f8b87e9108485a0b7070645662253561304458 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 31 Jan 2013 21:51:47 +0100
Subject: tipc: remove redundant checking for the number of iovecs in a send
 request

As the number of iovecs in a send request is already limited within
UIO_MAXIOV(i.e. 1024) in __sys_sendmsg(), it's unnecessary to check it
again in TIPC stack.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 net/tipc/socket.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cbe2f6ecf07a..a9622b6cd916 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -503,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 		     (dest->family != AF_TIPC)))
 		return -EINVAL;
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -612,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 	if (unlikely(dest))
 		return send_msg(iocb, sock, m, total_len);
 
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -698,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
-	if ((total_len > (unsigned int)INT_MAX) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX)) {
+	if (total_len > (unsigned int)INT_MAX) {
 		res = -EMSGSIZE;
 		goto exit;
 	}
-- 
cgit v1.2.3


From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Wed, 27 Feb 2013 17:06:00 -0800
Subject: hlist: drop the node parameter from iterators

I'm not sure why, but the hlist for each entry iterators were conceived

        list_for_each_entry(pos, head, member)

The hlist ones were greedy and wanted an extra parameter:

        hlist_for_each_entry(tpos, pos, head, member)

Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.

Besides the semantic patch, there was some manual work required:

 - Fix up the actual hlist iterators in linux/list.h
 - Fix up the declaration of other iterators based on the hlist ones.
 - A very small amount of places were using the 'node' parameter, this
 was modified to use 'obj->member' instead.
 - Coccinelle didn't handle the hlist_for_each_entry_safe iterator
 properly, so those had to be fixed up manually.

The semantic patch which is mostly the work of Peter Senna Tschudin is here:

@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;

type T;
expression a,c,d,e;
identifier b;
statement S;
@@

-T b;
    <+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
    ...+>

[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/tipc/name_table.c | 8 +++-----
 net/tipc/node.c       | 3 +--
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 46754779fd3d..24b167914311 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 static struct name_seq *nametbl_find_seq(u32 type)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *ns;
 
 	seq_head = &table.types[hash(type)];
-	hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
+	hlist_for_each_entry(ns, seq_head, ns_list) {
 		if (ns->type == type)
 			return ns;
 	}
@@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 			 u32 type, u32 lowbound, u32 upbound)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *seq;
 	int all_types;
 	int ret = 0;
@@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 			seq_head = &table.types[i];
-			hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+			hlist_for_each_entry(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
 						   lowbound, upbound, i);
@@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
 		seq_head = &table.types[i];
-		hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+		hlist_for_each_entry(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, type,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 48f39dd3eae8..6e6c434872e8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr)
 struct tipc_node *tipc_node_find(u32 addr)
 {
 	struct tipc_node *node;
-	struct hlist_node *pos;
 
 	if (unlikely(!in_own_cluster_exact(addr)))
 		return NULL;
 
-	hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) {
+	hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
 		if (node->addr == addr)
 			return node;
 	}
-- 
cgit v1.2.3