From f887cc48c6464af20058e81544e73e33daacbd52 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Oct 2012 11:20:02 -0700 Subject: net/tipc: remove depends on CONFIG_EXPERIMENTAL The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. CC: Jon Maloy CC: Allan Stephens CC: "David S. Miller" Signed-off-by: Kees Cook Acked-by: David S. Miller --- net/tipc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index bc41bd31eadc..4f99600a5fed 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -3,8 +3,8 @@ # menuconfig TIPC - tristate "The TIPC Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The TIPC Protocol" + depends on INET ---help--- The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol -- cgit v1.2.3 From c5c73dca596894c47760e4e955877b731ffabf57 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 14 Feb 2013 14:43:33 +0000 Subject: tipc: fix missing spinlock init in broadcast code After commit 3c294cb3 "tipc: remove the bearer congestion mechanism", we try to grab the broadcast bearer lock when sending multicast messages over the broadcast link. This will cause an oops because the lock is never initialized. This is an old bug, but the lock was never actually used before commit 3c294cb3, so that why it was not visible until now. The oops will look something like: BUG: spinlock bad magic on CPU#2, daemon/147 lock: bcast_bearer+0x48/0xffffffffffffd19a [tipc], .magic: 00000000, .owner: /-1, .owner_cpu: 0 Pid: 147, comm: daemon Not tainted 3.8.0-rc3+ #206 Call Trace: spin_dump+0x8a/0x8f spin_bug+0x21/0x26 do_raw_spin_lock+0x114/0x150 _raw_spin_lock_bh+0x19/0x20 tipc_bearer_blocked+0x1f/0x40 [tipc] tipc_link_send_buf+0x82/0x280 [tipc] ? __alloc_skb+0x9f/0x2b0 tipc_bclink_send_msg+0x77/0xa0 [tipc] tipc_multicast+0x11b/0x1b0 [tipc] send_msg+0x225/0x530 [tipc] sock_sendmsg+0xca/0xe0 The above can be triggered by running the multicast demo program. Signed-off-by: Erik Hugne Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/bcast.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 54f89f90ac33..2655c9f4ecad 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -774,6 +774,7 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); + spin_lock_init(&bcbearer->bearer.lock); bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); -- cgit v1.2.3 From 57467e56293796f780e91a24600a732516f534ac Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 20 Jan 2013 23:30:08 +0100 Subject: tipc: eliminate duplicated discard_rx_queue routine The tipc function discard_rx_queue() is just a duplicated implementation of __skb_queue_purge(). Remove the former and directly invoke __skb_queue_purge(). In doing so, the underscores convey to the code reader, more information about the current locking state that is assumed. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b4e4833a484..f6ceecd44749 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -128,19 +128,6 @@ static void advance_rx_queue(struct sock *sk) kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } -/** - * discard_rx_queue - discard all buffers in socket receive queue - * - * Caller must hold socket lock - */ -static void discard_rx_queue(struct sock *sk) -{ - struct sk_buff *buf; - - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - kfree_skb(buf); -} - /** * reject_rx_queue - reject all buffers in socket receive queue * @@ -292,7 +279,7 @@ static int release(struct socket *sock) res = tipc_deleteport(tport->ref); /* Discard any remaining (connection-based) messages in receive queue */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; @@ -1637,7 +1624,7 @@ restart: case SS_DISCONNECTING: /* Discard any unreceived messages */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); -- cgit v1.2.3 From aba79f332f46ca8529f3e62a9fc2926c8fe75e44 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 20 Jan 2013 23:30:09 +0100 Subject: tipc: byte-based overload control on socket receive queue Change overload control to be purely byte-based, using sk->sk_rmem_alloc as byte counter, and compare it to a calculated upper limit for the socket receive queue. For all connection messages, irrespective of message importance, the overload limit is set to a constant value (i.e, 67MB). This limit should normally never be reached because of the lower limit used by the flow control algorithm, and is there only as a last resort in case a faulty peer doesn't respect the send window limit. For datagram messages, message importance is taken into account when calculating the overload limit. The calculation is based on sk->sk_rcvbuf, and is hence configurable via the socket option SO_RCVBUF. Cc: Neil Horman Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 77 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 38 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f6ceecd44749..cbe2f6ecf07a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -43,7 +43,8 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define OVERLOAD_LIMIT_BASE 10000 +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -202,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; - sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -1141,34 +1141,6 @@ static void tipc_data_ready(struct sock *sk, int len) rcu_read_unlock(); } -/** - * rx_queue_full - determine if receive queue can accept another message - * @msg: message to be added to queue - * @queue_size: current size of queue - * @base: nominal maximum size of queue - * - * Returns 1 if queue is unable to accept message, 0 otherwise - */ -static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base) -{ - u32 threshold; - u32 imp = msg_importance(msg); - - if (imp == TIPC_LOW_IMPORTANCE) - threshold = base; - else if (imp == TIPC_MEDIUM_IMPORTANCE) - threshold = base * 2; - else if (imp == TIPC_HIGH_IMPORTANCE) - threshold = base * 100; - else - return 0; - - if (msg_connected(msg)) - threshold *= 4; - - return queue_size >= threshold; -} - /** * filter_connect - Handle all incoming messages for a connection-based socket * @tsock: TIPC socket @@ -1246,6 +1218,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) return retval; } +/** + * rcvbuf_limit - get proper overload limit of socket receive queue + * @sk: socket + * @buf: message + * + * For all connection oriented messages, irrespective of importance, + * the default overload value (i.e. 67MB) is set as limit. + * + * For all connectionless messages, by default new queue limits are + * as belows: + * + * TIPC_LOW_IMPORTANCE (5MB) + * TIPC_MEDIUM_IMPORTANCE (10MB) + * TIPC_HIGH_IMPORTANCE (20MB) + * TIPC_CRITICAL_IMPORTANCE (40MB) + * + * Returns overload limit according to corresponding message importance + */ +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + unsigned int limit; + + if (msg_connected(msg)) + limit = CONN_OVERLOAD_LIMIT; + else + limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + return limit; +} + /** * filter_rcv - validate incoming message * @sk: socket @@ -1262,7 +1264,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(buf); - u32 recv_q_len; + unsigned int limit = rcvbuf_limit(sk, buf); u32 res = TIPC_OK; /* Reject message if it is wrong sort of message for socket */ @@ -1279,15 +1281,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /* Reject message if there isn't room to queue it */ - recv_q_len = skb_queue_len(&sk->sk_receive_queue); - if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) { - if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2)) - return TIPC_ERR_OVERLOAD; - } + if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + return TIPC_ERR_OVERLOAD; - /* Enqueue message (finally!) */ + /* Enqueue message */ TIPC_SKB_CB(buf)->handle = 0; __skb_queue_tail(&sk->sk_receive_queue, buf); + skb_set_owner_r(buf, sk); sk->sk_data_ready(sk, 0); return TIPC_OK; @@ -1336,7 +1336,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; @@ -1570,6 +1570,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); + skb_set_owner_r(buf, new_sk); } release_sock(new_sk); -- cgit v1.2.3 From 97f8b87e9108485a0b7070645662253561304458 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 31 Jan 2013 21:51:47 +0100 Subject: tipc: remove redundant checking for the number of iovecs in a send request As the number of iovecs in a send request is already limited within UIO_MAXIOV(i.e. 1024) in __sys_sendmsg(), it's unnecessary to check it again in TIPC stack. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cbe2f6ecf07a..a9622b6cd916 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -503,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -612,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(dest)) return send_msg(iocb, sock, m, total_len); - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -698,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - if ((total_len > (unsigned int)INT_MAX) || - (m->msg_iovlen > (unsigned int)INT_MAX)) { + if (total_len > (unsigned int)INT_MAX) { res = -EMSGSIZE; goto exit; } -- cgit v1.2.3 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/tipc/name_table.c | 8 +++----- net/tipc/node.c | 3 +-- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 46754779fd3d..24b167914311 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, static struct name_seq *nametbl_find_seq(u32 type) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *ns; seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { + hlist_for_each_entry(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info, u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *seq; int all_types; int ret = 0; @@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, diff --git a/net/tipc/node.c b/net/tipc/node.c index 48f39dd3eae8..6e6c434872e8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr) struct tipc_node *tipc_node_find(u32 addr) { struct tipc_node *node; - struct hlist_node *pos; if (unlikely(!in_own_cluster_exact(addr))) return NULL; - hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) return node; } -- cgit v1.2.3