From 67426b756c4d52c511c4b22b269accea171692a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Oct 2010 20:44:44 +0000 Subject: af_unix: use keyed wakeups Instead of wakeup all sleepers, use wake_up_interruptible_sync_poll() to wakeup only ones interested into writing the socket. This patch is a specialization of commit 37e5540b3c9d (epoll keyed wakeups: make sockets use keyed wakeups). On a test program provided by Alan Crequy : Before: real 0m3.101s user 0m0.000s sys 0m6.104s After: real 0m0.211s user 0m0.000s sys 0m0.208s Reported-by: Alban Crequy Signed-off-by: Eric Dumazet Cc: Davide Libenzi Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3c95304a0817..f33c5958dbb2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,7 +316,8 @@ static void unix_write_space(struct sock *sk) if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); if (wq_has_sleeper(wq)) - wake_up_interruptible_sync(&wq->wait); + wake_up_interruptible_sync_poll(&wq->wait, + POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); @@ -1710,7 +1711,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_unlock; } - wake_up_interruptible_sync(&u->peer_wait); + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | POLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); -- cgit v1.2.3 From 5456f09aaf88731e16dbcea7522cb330b6846415 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 31 Oct 2010 05:36:23 +0000 Subject: af_unix: fix unix_dgram_poll() behavior for EPOLLOUT event Alban Crequy reported a problem with connected dgram af_unix sockets and provided a test program. epoll() would miss to send an EPOLLOUT event when a thread unqueues a packet from the other peer, making its receive queue not full. This is because unix_dgram_poll() fails to call sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); if the socket is not writeable at the time epoll_ctl(ADD) is called. We must call sock_poll_wait(), regardless of 'writable' status, so that epoll can be notified later of states changes. Misc: avoids testing twice (sk->sk_shutdown & RCV_SHUTDOWN) Reported-by: Alban Crequy Cc: Davide Libenzi Signed-off-by: Eric Dumazet Acked-by: Davide Libenzi Signed-off-by: David S. Miller --- net/unix/af_unix.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f33c5958dbb2..e8898758dd31 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2074,13 +2074,12 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP; + mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || - (sk->sk_shutdown & RCV_SHUTDOWN)) + if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2092,20 +2091,15 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; } - /* writable? */ writable = unix_writable(sk); - if (writable) { - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, - wait); - if (unix_recvq_full(other)) - writable = 0; - } - - sock_put(other); + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); + if (unix_recvq_full(other)) + writable = 0; } + sock_put(other); } if (writable) -- cgit v1.2.3 From 973a34aa8593dbfe84386343c694f5beecb51d8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 31 Oct 2010 05:38:25 +0000 Subject: af_unix: optimize unix_dgram_poll() unix_dgram_poll() is pretty expensive to check POLLOUT status, because it has to lock the socket to get its peer, take a reference on the peer to check its receive queue status, and queue another poll_wait on peer_wait. This all can be avoided if the process calling unix_dgram_poll() is not interested in POLLOUT status. It makes unix_dgram_recvmsg() faster by not queueing irrelevant pollers in peer_wait. On a test program provided by Alan Crequy : Before: real 0m0.211s user 0m0.000s sys 0m0.208s After: real 0m0.044s user 0m0.000s sys 0m0.040s Suggested-by: Davide Libenzi Reported-by: Alban Crequy Acked-by: Davide Libenzi Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e8898758dd31..7ff31c60186a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2091,6 +2091,10 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; } + /* No write status requested, avoid expensive OUT tests. */ + if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT))) + return mask; + writable = unix_writable(sk); other = unix_peer_get(sk); if (other) { -- cgit v1.2.3 From 3610cda53f247e176bcbb7a7cca64bc53b12acdb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Jan 2011 15:38:53 -0800 Subject: af_unix: Avoid socket->sk NULL OOPS in stream connect security hooks. unix_release() can asynchornously set socket->sk to NULL, and it does so without holding the unix_state_lock() on "other" during stream connects. However, the reverse mapping, sk->sk_socket, is only transitioned to NULL under the unix_state_lock(). Therefore make the security hooks follow the reverse mapping instead of the forward mapping. Reported-by: Jeremy Fitzhardinge Reported-by: Linus Torvalds Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 417d7a6c36cf..dd419d286204 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1157,7 +1157,7 @@ restart: goto restart; } - err = security_unix_stream_connect(sock, other->sk_socket, newsk); + err = security_unix_stream_connect(sk, other, newsk); if (err) { unix_state_unlock(sk); goto out_unlock; -- cgit v1.2.3