From cf3aa02cb4a0c5af5557dd47f15a08a7df33182a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 11:09:06 -0400 Subject: svcrpc: fix handling of too-short rpc's If we detect that an rpc is too short, we abort and close the connection. Except, there's a bug here: we're leaving sk_datalen nonzero without leaving any pages in the sk_pages array. The most likely result of the inconsistency is a subsequent crash in svc_tcp_clear_pages. Also demote the BUG_ON in svc_tcp_clear_pages to a WARN. Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d072..df74919c81c0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svc_sock_reclen(svsk) < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; -- cgit v1.2.3 From 1f691b07c5dc51b2055834f58c0f351defd97f27 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 10:55:40 -0400 Subject: svcrpc: don't error out on small tcp fragment Though clients we care about mostly don't do this, it is possible for rpc requests to be sent in multiple fragments. Here we have a sanity check to ensure that the final received rpc isn't too small--except that the number we're actually checking is the length of just the final fragment, not of the whole rpc. So a perfectly legal rpc that's unluckily fragmented could cause the server to close the connection here. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index df74919c81c0..305374d4fb98 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) { + if (svsk->sk_datalen < 8) { svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ } -- cgit v1.2.3 From 447383d2ba6061bb069da45f95f223a01bba61dd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jul 2013 11:30:23 +1000 Subject: NFSD/sunrpc: avoid deadlock on TCP connection due to memory pressure. Since we enabled auto-tuning for sunrpc TCP connections we do not guarantee that there is enough write-space on each connection to queue a reply. If memory pressure causes the window to shrink too small, the request throttling in sunrpc/svc will not accept any requests so no more requests will be handled. Even when pressure decreases the window will not grow again until data is sent on the connection. This means we get a deadlock: no requests will be handled until there is more space, and no space will be allocated until a request is handled. This can be simulated by modifying svc_tcp_has_wspace to inflate the number of byte required and removing the 'svc_sock_setbufsize' calls in svc_setup_socket. I found that multiplying by 16 was enough to make the requirement exceed the default allocation. With this modification in place: mount -o vers=3,proto=tcp 127.0.0.1:/home /mnt would block and eventually time out because the nfs server could not accept any requests. This patch relaxes the request throttling to always allow at least one request through per connection. It does this by checking both sk_stream_min_wspace() and xprt->xpt_reserved are zero. The first is zero when the TCP transmit queue is empty. The second is zero when there are no RPC requests being processed. When both of these are zero the socket is idle and so one more request can safely be allowed through. Applying this patch allows the above mount command to succeed cleanly. Tracing shows that the allocated write buffer space quickly grows and after a few requests are handled, the extra tests are no longer needed to permit further requests to be processed. The main purpose of request throttling is to handle the case when one client is slow at collecting replies and the send queue gets full of replies that the client hasn't acknowledged (at the TCP level) yet. As we only change behaviour when the send queue is empty this main purpose is still preserved. Reported-by: Ben Myers Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 305374d4fb98..7762b9f8a8b7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1193,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) return 1; required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required) + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); return 0; -- cgit v1.2.3