From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed534ef2..3a7567f690f3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); -- cgit v1.2.3 From 22d85c79428b8ca9a01623aa3e3a1fe29a30a119 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:23 -0400 Subject: tipc: change sk_buffer handling in tipc_link_xmit() When the function tipc_link_xmit() is given a buffer list for transmission, it currently consumes the list both when transmission is successful and when it fails, except for the special case when it encounters link congestion. This behavior is inconsistent, and needs to be corrected if we want to avoid problems in later commits in this series. In this commit, we change this to let the function consume the list only when transmission is successful, and leave the list with the sender in all other cases. We also modifiy the socket code so that it adapts to this change, i.e., purges the list when a non-congestion error code is returned. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3a7567f690f3..87fef25f6519 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -686,21 +686,22 @@ new_mtu: do { rc = tipc_bclink_xmit(net, pktchain); - if (likely(rc >= 0)) { - rc = dsz; - break; + if (likely(!rc)) + return dsz; + + if (rc == -ELINKCONG) { + tsk->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (!rc) + continue; } + __skb_queue_purge(pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; } - if (rc != -ELINKCONG) - break; - tipc_sk(sk)->link_cong = 1; - rc = tipc_wait_for_sndmsg(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); - } while (!rc); + break; + } while (1); return rc; } @@ -925,23 +926,24 @@ new_mtu: skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); - if (likely(rc >= 0)) { + if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; - rc = dsz; - break; + return dsz; } + if (rc == -ELINKCONG) { + tsk->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (!rc) + continue; + } + __skb_queue_purge(pktchain); if (rc == -EMSGSIZE) { m->msg_iter = save; goto new_mtu; } - if (rc != -ELINKCONG) - break; - tsk->link_cong = 1; - rc = tipc_wait_for_sndmsg(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); - } while (!rc); + break; + } while (1); return rc; } @@ -1048,10 +1050,11 @@ next: tsk->sent_unacked++; sent += send; if (sent == dsz) - break; + return dsz; goto next; } if (rc == -EMSGSIZE) { + __skb_queue_purge(pktchain); tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); m->msg_iter = save; @@ -1059,13 +1062,13 @@ next: } if (rc != -ELINKCONG) break; + tsk->link_cong = 1; } rc = tipc_wait_for_sndpkt(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); } while (!rc); + __skb_queue_purge(pktchain); return sent ? sent : rc; } -- cgit v1.2.3 From af9b028e270fda6fb812d70d17d902297df1ceb5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:24 -0400 Subject: tipc: make media xmit call outside node spinlock context Currently, message sending is performed through a deep call chain, where the node spinlock is grabbed and held during a significant part of the transmission time. This is clearly detrimental to overall throughput performance; it would be better if we could send the message after the spinlock has been released. In this commit, we do instead let the call revert on the stack after the buffer chain has been added to the transmission queue, whereafter clones of the buffers are transmitted to the device layer outside the spinlock scope. As a further step in our effort to separate the roles of the node and link entities we also move the function tipc_link_xmit() to node.c, and rename it to tipc_node_xmit(). Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 87fef25f6519..5b0b08d58fcc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -261,7 +261,7 @@ static void tsk_rej_rx_queue(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -443,7 +443,7 @@ static int tipc_release(struct socket *sock) } if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(net, skb, dnode, 0); + tipc_node_xmit_skb(net, skb, dnode, 0); } } @@ -456,7 +456,7 @@ static int tipc_release(struct socket *sock) tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); tipc_node_remove_conn(net, dnode, tsk->portid); } @@ -925,7 +925,7 @@ new_mtu: do { skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); + rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -1045,7 +1045,7 @@ next: return rc; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(net, pktchain, dnode, portid); + rc = tipc_node_xmit(net, pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1224,7 +1224,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); + tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1703,7 +1703,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1799,7 +1799,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) continue; xmit: - tipc_link_xmit_skb(net, skb, dnode, dport); + tipc_node_xmit_skb(net, skb, dnode, dport); } return err ? -EHOSTUNREACH : 0; } @@ -2092,7 +2092,7 @@ restart: } if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(net, skb, dnode, + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); @@ -2102,7 +2102,7 @@ restart: 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; @@ -2164,7 +2164,7 @@ static void tipc_sk_timeout(unsigned long data) } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); + tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: sock_put(sk); } -- cgit v1.2.3 From 29042e19f2c602fabe4705b5b719550b4627639c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:18 -0400 Subject: tipc: let function tipc_msg_reverse() expand header when needed The shortest TIPC message header, for cluster local CONNECTED messages, is 24 bytes long. With this format, the fields "dest_node" and "orig_node" are optimized away, since they in reality are redundant in this particular case. However, the absence of these fields leads to code inconsistencies that are difficult to handle in some cases, especially when we need to reverse or reject messages at the socket layer. In this commit, we concentrate the handling of the absent fields to one place, by letting the function tipc_msg_reverse() reallocate the buffer and expand the header to 32 bytes when necessary. This means that the socket code now can assume that the two previously absent fields are present in the header when a message needs to be rejected. This opens up for some further simplifications of the socket code. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5b0b08d58fcc..e2d5b9831485 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -260,7 +260,7 @@ static void tsk_rej_rx_queue(struct sock *sk) u32 own_node = tsk_own_node(tipc_sk(sk)); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + if (tipc_msg_reverse(own_node, &skb, &dnode, TIPC_ERR_NO_PORT)) tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -441,7 +441,7 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, TIPC_ERR_NO_PORT)) tipc_node_xmit_skb(net, skb, dnode, 0); } @@ -784,7 +784,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_OK)) { msg_set_type(msg, CONN_PROBE_REPLY); return; } @@ -1702,7 +1702,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) atomic_add(truesize, dcnt); return 0; } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, -err)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1796,7 +1796,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) goto xmit; } tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + if (!tipc_msg_reverse(tn->own_addr, &skb, &dnode, -err)) continue; xmit: tipc_node_xmit_skb(net, skb, dnode, dport); @@ -2090,7 +2090,7 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, TIPC_CONN_SHUTDOWN)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); -- cgit v1.2.3 From bcd3ffd4f6d7c994c93be2ab8598fdfb2952a1f1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:19 -0400 Subject: tipc: introduce new tipc_sk_respond() function Currently, we use the code sequence if (msg_reverse()) tipc_link_xmit_skb() at numerous locations in socket.c. The preparation of arguments for these calls, as well as the sequence itself, makes the code unecessarily complex. In this commit, we introduce a new function, tipc_sk_respond(), that performs this call combination. We also replace some, but not yet all, of these explicit call sequences with calls to the new function. Notably, we let the function tipc_sk_proto_rcv() use the new function to directly send out PROBE_REPLY messages, instead of deferring this to the calling tipc_sk_rcv() function, as we do now. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 81 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 36 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e2d5b9831485..71d88adadb18 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -248,6 +248,22 @@ static void tsk_advance_rx_queue(struct sock *sk) kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } +/* tipc_sk_respond() : send response message back to sender + */ +static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) +{ + u32 selector; + u32 dnode; + u32 onode = tipc_own_addr(sock_net(sk)); + + if (!tipc_msg_reverse(onode, &skb, err)) + return; + + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); +} + /** * tsk_rej_rx_queue - reject all buffers in socket receive queue * @@ -256,13 +272,9 @@ static void tsk_advance_rx_queue(struct sock *sk) static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; - u32 dnode; - u32 own_node = tsk_own_node(tipc_sk(sk)); - while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(own_node, &skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); - } + while ((skb = __skb_dequeue(&sk->sk_receive_queue))) + tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } /* tsk_peer_msg - verify if message was sent by connected port's peer @@ -441,9 +453,7 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, - TIPC_ERR_NO_PORT)) - tipc_node_xmit_skb(net, skb, dnode, 0); + tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } } @@ -764,35 +774,35 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket - * @skb: pointer to message buffer. Set to NULL if buffer is consumed. + * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(*skb); + struct sock *sk = &tsk->sk; + struct tipc_msg *hdr = buf_msg(skb); + int mtyp = msg_type(hdr); int conn_cong; - u32 dnode; - u32 own_node = tsk_own_node(tsk); + /* Ignore if connection cannot be validated: */ - if (!tsk_peer_msg(tsk, msg)) + if (!tsk_peer_msg(tsk, hdr)) goto exit; tsk->probing_state = TIPC_CONN_OK; - if (msg_type(msg) == CONN_ACK) { + if (mtyp == CONN_PROBE) { + msg_set_type(hdr, CONN_PROBE_REPLY); + tipc_sk_respond(sk, skb, TIPC_OK); + return; + } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); - tsk->sent_unacked -= msg_msgcnt(msg); + tsk->sent_unacked -= msg_msgcnt(hdr); if (conn_cong) - tsk->sk.sk_write_space(&tsk->sk); - } else if (msg_type(msg) == CONN_PROBE) { - if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_OK)) { - msg_set_type(msg, CONN_PROBE_REPLY); - return; - } + sk->sk_write_space(sk); + } else if (mtyp != CONN_PROBE_REPLY) { + pr_warn("Received unknown CONN_PROTO msg\n"); } - /* Do nothing if msg_type() == CONN_PROBE_REPLY */ exit: - kfree_skb(*skb); - *skb = NULL; + kfree_skb(skb); } static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) @@ -1638,7 +1648,7 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb) int rc = TIPC_OK; if (unlikely(msg_user(msg) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, *skb); return TIPC_OK; } @@ -1690,7 +1700,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err; atomic_t *dcnt; - u32 dnode; + u32 dnode = msg_prevnode(buf_msg(skb)); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); uint truesize = skb->truesize; @@ -1702,7 +1712,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) atomic_add(truesize, dcnt); return 0; } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, -err)) + if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, -err)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1794,9 +1804,11 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (!err) { dnode = msg_destnode(buf_msg(skb)); goto xmit; + } else { + dnode = msg_prevnode(buf_msg(skb)); } tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, &skb, &dnode, -err)) + if (!tipc_msg_reverse(tn->own_addr, &skb, -err)) continue; xmit: tipc_node_xmit_skb(net, skb, dnode, dport); @@ -2083,6 +2095,8 @@ static int tipc_shutdown(struct socket *sock, int how) case SS_CONNECTED: restart: + dnode = tsk_peer_node(tsk); + /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ skb = __skb_dequeue(&sk->sk_receive_queue); if (skb) { @@ -2090,13 +2104,8 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, - TIPC_CONN_SHUTDOWN)) - tipc_node_xmit_skb(net, skb, dnode, - tsk->portid); + tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN); } else { - dnode = tsk_peer_node(tsk); - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), -- cgit v1.2.3 From cda3696d3d26eb798c94de0dab5bd66ddb5627cb Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:20 -0400 Subject: tipc: clean up socket layer message reception When a message is received in a socket, one of the call chains tipc_sk_rcv()->tipc_sk_enqueue()->filter_rcv()(->tipc_sk_proto_rcv()) or tipc_sk_backlog_rcv()->filter_rcv()(->tipc_sk_proto_rcv()) are followed. At each of these levels we may encounter situations where the message may need to be rejected, or a new message produced for transfer back to the sender. Despite recent improvements, the current code for doing this is perceived as awkward and hard to follow. Leveraging the two previous commits in this series, we now introduce a more uniform handling of such situations. We let each of the functions in the chain itself produce/reverse the message to be returned to the sender, but also perform the actual forwarding. This simplifies the necessary logics within each function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 255 ++++++++++++++++++++++++++---------------------------- 1 file changed, 123 insertions(+), 132 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 71d88adadb18..1060d52ff23e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1520,82 +1520,81 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @skb: pointer to message buffer. Set to NULL if buffer is consumed * - * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise + * Returns true if everything ok, false otherwise */ -static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) +static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(*skb); - int retval = -TIPC_ERR_NO_PORT; + struct tipc_msg *hdr = buf_msg(skb); - if (msg_mcast(msg)) - return retval; + if (unlikely(msg_mcast(hdr))) + return false; switch ((int)sock->state) { case SS_CONNECTED: + /* Accept only connection-based messages sent by peer */ - if (tsk_peer_msg(tsk, msg)) { - if (unlikely(msg_errcode(msg))) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - /* let timer expire on it's own */ - tipc_node_remove_conn(net, tsk_peer_node(tsk), - tsk->portid); - } - retval = TIPC_OK; + if (unlikely(!tsk_peer_msg(tsk, hdr))) + return false; + + if (unlikely(msg_errcode(hdr))) { + sock->state = SS_DISCONNECTING; + tsk->connected = 0; + /* Let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); } - break; + return true; + case SS_CONNECTING: - /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(msg))) - break; + /* Accept only ACK or NACK message */ + if (unlikely(!msg_connected(hdr))) + return false; - if (unlikely(msg_errcode(msg))) { + if (unlikely(msg_errcode(hdr))) { sock->state = SS_DISCONNECTING; sk->sk_err = ECONNREFUSED; - retval = TIPC_OK; - break; + return true; } - if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) { + if (unlikely(!msg_isdata(hdr))) { sock->state = SS_DISCONNECTING; sk->sk_err = EINVAL; - retval = TIPC_OK; - break; + return true; } - tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); - msg_set_importance(&tsk->phdr, msg_importance(msg)); + tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); + msg_set_importance(&tsk->phdr, msg_importance(hdr)); sock->state = SS_CONNECTED; - /* If an incoming message is an 'ACK-', it should be - * discarded here because it doesn't contain useful - * data. In addition, we should try to wake up - * connect() routine if sleeping. - */ - if (msg_data_sz(msg) == 0) { - kfree_skb(*skb); - *skb = NULL; - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); - } - retval = TIPC_OK; - break; + /* If 'ACK+' message, add to socket receive queue */ + if (msg_data_sz(hdr)) + return true; + + /* If empty 'ACK-' message, wake up sleeping connect() */ + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); + + /* 'ACK-' message is neither accepted nor rejected: */ + msg_set_dest_droppable(hdr, 1); + return false; + case SS_LISTENING: case SS_UNCONNECTED: + /* Accept only SYN message */ - if (!msg_connected(msg) && !(msg_errcode(msg))) - retval = TIPC_OK; + if (!msg_connected(hdr) && !(msg_errcode(hdr))) + return true; break; case SS_DISCONNECTING: break; default: pr_err("Unknown socket state %u\n", sock->state); } - return retval; + return false; } /** @@ -1630,61 +1629,70 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) /** * filter_rcv - validate incoming message * @sk: socket - * @skb: pointer to message. Set to NULL if buffer is consumed. + * @skb: pointer to message. * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * * Called with socket lock already taken * - * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected + * Returns true if message was added to socket receive queue, otherwise false */ -static int filter_rcv(struct sock *sk, struct sk_buff **skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_msg *msg = buf_msg(*skb); - unsigned int limit = rcvbuf_limit(sk, *skb); - int rc = TIPC_OK; + struct tipc_msg *hdr = buf_msg(skb); + unsigned int limit = rcvbuf_limit(sk, skb); + int err = TIPC_OK; + int usr = msg_user(hdr); - if (unlikely(msg_user(msg) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, *skb); - return TIPC_OK; + if (unlikely(msg_user(hdr) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return false; } - if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { - kfree_skb(*skb); + if (unlikely(usr == SOCK_WAKEUP)) { + kfree_skb(skb); tsk->link_cong = 0; sk->sk_write_space(sk); - *skb = NULL; - return TIPC_OK; + return false; } - /* Reject message if it is wrong sort of message for socket */ - if (msg_type(msg) > TIPC_DIRECT_MSG) - return -TIPC_ERR_NO_PORT; + /* Drop if illegal message type */ + if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { + kfree_skb(skb); + return false; + } - if (sock->state == SS_READY) { - if (msg_connected(msg)) - return -TIPC_ERR_NO_PORT; - } else { - rc = filter_connect(tsk, skb); - if (rc != TIPC_OK || !*skb) - return rc; + /* Reject if wrong message type for current socket state */ + if (unlikely(sock->state == SS_READY)) { + if (msg_connected(hdr)) { + err = TIPC_ERR_NO_PORT; + goto reject; + } + } else if (unlikely(!filter_connect(tsk, skb))) { + err = TIPC_ERR_NO_PORT; + goto reject; } /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) - return -TIPC_ERR_OVERLOAD; + if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { + err = TIPC_ERR_OVERLOAD; + goto reject; + } /* Enqueue message */ - TIPC_SKB_CB(*skb)->handle = NULL; - __skb_queue_tail(&sk->sk_receive_queue, *skb); - skb_set_owner_r(*skb, sk); + TIPC_SKB_CB(skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); sk->sk_data_ready(sk); - *skb = NULL; - return TIPC_OK; + return true; + +reject: + tipc_sk_respond(sk, skb, err); + return false; } /** @@ -1698,22 +1706,10 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - int err; - atomic_t *dcnt; - u32 dnode = msg_prevnode(buf_msg(skb)); - struct tipc_sock *tsk = tipc_sk(sk); - struct net *net = sock_net(sk); - uint truesize = skb->truesize; + unsigned int truesize = skb->truesize; - err = filter_rcv(sk, &skb); - if (likely(!skb)) { - dcnt = &tsk->dupl_rcvcnt; - if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, dcnt); - return 0; - } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, -err)) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (likely(filter_rcv(sk, skb))) + atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); return 0; } @@ -1723,45 +1719,43 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * @inputq: list of incoming buffers with potentially different destinations * @sk: socket where the buffers should be enqueued * @dport: port number for the socket - * @_skb: returned buffer to be forwarded or rejected, if applicable * * Caller must hold socket lock - * - * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD - * or -TIPC_ERR_NO_PORT */ -static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport, struct sk_buff **_skb) +static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport) { unsigned int lim; atomic_t *dcnt; - int err; struct sk_buff *skb; unsigned long time_limit = jiffies + 2; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) - return TIPC_OK; + return; + skb = tipc_skb_dequeue(inputq, dport); if (unlikely(!skb)) - return TIPC_OK; + return; + + /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, &skb); - if (likely(!skb)) - continue; - *_skb = skb; - return err; + filter_rcv(sk, skb); + continue; } + + /* Try backlog, compensating for double-counted bytes */ dcnt = &tipc_sk(sk)->dupl_rcvcnt; if (sk->sk_backlog.len) atomic_set(dcnt, 0); lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); if (likely(!sk_add_backlog(sk, skb, lim))) continue; - *_skb = skb; - return -TIPC_ERR_OVERLOAD; + + /* Overload => reject message back to sender */ + tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + break; } - return TIPC_OK; } /** @@ -1769,51 +1763,46 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, * @inputq: buffer list containing the buffers * Consumes all buffers in list until inputq is empty * Note: may be called in multiple threads referring to the same queue - * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH - * Only node local calls check the return value, sending single-buffer queues */ -int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { u32 dnode, dport = 0; int err; - struct sk_buff *skb; struct tipc_sock *tsk; - struct tipc_net *tn; struct sock *sk; + struct sk_buff *skb; while (skb_queue_len(inputq)) { - err = -TIPC_ERR_NO_PORT; - skb = NULL; dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - err = tipc_sk_enqueue(inputq, sk, dport, &skb); + tipc_sk_enqueue(inputq, sk, dport); spin_unlock_bh(&sk->sk_lock.slock); - dport = 0; } sock_put(sk); - } else { - skb = tipc_skb_dequeue(inputq, dport); - } - if (likely(!skb)) continue; - if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) - goto xmit; - if (!err) { - dnode = msg_destnode(buf_msg(skb)); - goto xmit; - } else { - dnode = msg_prevnode(buf_msg(skb)); } - tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, &skb, -err)) + + /* No destination socket => dequeue skb if still there */ + skb = tipc_skb_dequeue(inputq, dport); + if (!skb) + return; + + /* Try secondary lookup if unresolved named message */ + err = TIPC_ERR_NO_PORT; + if (tipc_msg_lookup_dest(net, skb, &err)) + goto xmit; + + /* Prepare for message rejection */ + if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) continue; xmit: + dnode = msg_destnode(buf_msg(skb)); tipc_node_xmit_skb(net, skb, dnode, dport); } - return err ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -2082,7 +2071,10 @@ static int tipc_shutdown(struct socket *sock, int how) struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; - u32 dnode; + u32 dnode = tsk_peer_node(tsk); + u32 dport = tsk_peer_port(tsk); + u32 onode = tipc_own_addr(net); + u32 oport = tsk->portid; int res; if (how != SHUT_RDWR) @@ -2108,9 +2100,8 @@ restart: } else { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tsk_own_node(tsk), - tsk_peer_port(tsk), - tsk->portid, TIPC_CONN_SHUTDOWN); + 0, dnode, onode, dport, oport, + TIPC_CONN_SHUTDOWN); tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; -- cgit v1.2.3