From b0ac1bd2bbfd5e500432714e55a791c4d394047f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 13:30:42 -0500 Subject: NFS: Background flush should not be low priority Background flush is needed in order to satisfy the global page limits. Don't subvert by reducing the priority. This should also address a write starvation issue that was reported by Neil Brown. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..7a4fe7d82e65 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -247,8 +247,6 @@ static int wb_priority(struct writeback_control *wbc) return FLUSH_HIGHPRI | FLUSH_STABLE; if (wbc->sync_mode == WB_SYNC_ALL) ret = FLUSH_COND_STABLE; - if (wbc->for_kupdate || wbc->for_background) - ret |= FLUSH_LOWPRI; return ret; } -- cgit v1.2.3 From 494f74a26c14d10bb26a45218b50feb75bdedeca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 13:34:59 -0500 Subject: NFS: Flush reclaim writes using FLUSH_COND_STABLE If there are already writes queued up for commit, then don't flush just this page even if it is a reclaim issue. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7a4fe7d82e65..1ea35f88eadb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -244,7 +244,7 @@ static int wb_priority(struct writeback_control *wbc) { int ret = 0; if (wbc->for_reclaim) - return FLUSH_HIGHPRI | FLUSH_STABLE; + return FLUSH_HIGHPRI | FLUSH_COND_STABLE; if (wbc->sync_mode == WB_SYNC_ALL) ret = FLUSH_COND_STABLE; return ret; -- cgit v1.2.3 From 0bcbf039f6b2bcefe4f5dada76079080edf9ecd0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 15:57:31 +0800 Subject: nfs: handle request add failure properly When we fail to queue a read page to IO descriptor, we need to clean it up otherwise it is hanging around preventing nfs module from being removed. When we fail to queue a write page to IO descriptor, we need to clean it up and also save the failure status to open context. Then at file close, we can try to write pages back again and drop the page if it fails to writeback in .launder_page, which will be done in the next patch. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..9dafb08ddae5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -545,6 +545,15 @@ try_again: return head; } +static void nfs_write_error_remove_page(struct nfs_page *req) +{ + nfs_unlock_request(req); + nfs_end_page_writeback(req); + nfs_release_request(req); + generic_error_remove_page(page_file_mapping(req->wb_page), + req->wb_page); +} + /* * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). @@ -567,8 +576,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, ret = 0; if (!nfs_pageio_add_request(pgio, req)) { - nfs_redirty_request(req); ret = pgio->pg_error; + /* + * Remove the problematic req upon fatal errors, + * while other dirty pages can still be around + * until they get flushed. + */ + if (nfs_error_is_fatal(ret)) { + nfs_context_set_write_error(req->wb_context, ret); + nfs_write_error_remove_page(req); + } else { + nfs_redirty_request(req); + ret = -EAGAIN; + } } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); -- cgit v1.2.3 From d6c843b96e1cb5199147e3281a724e3c0b69a9ab Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 16:20:43 +0800 Subject: nfs: only remove page from mapping if launder_page fails Instead of dropping pages when write fails, only do it when we get fatal failure in launder_page write back. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9dafb08ddae5..4d254232d728 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -559,7 +559,8 @@ static void nfs_write_error_remove_page(struct nfs_page *req) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page, bool nonblock) + struct page *page, bool nonblock, + bool launder) { struct nfs_page *req; int ret = 0; @@ -578,17 +579,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* - * Remove the problematic req upon fatal errors, - * while other dirty pages can still be around - * until they get flushed. + * Remove the problematic req upon fatal errors + * in launder case, while other dirty pages can + * still be around until they get flushed. */ if (nfs_error_is_fatal(ret)) { nfs_context_set_write_error(req->wb_context, ret); - nfs_write_error_remove_page(req); - } else { - nfs_redirty_request(req); - ret = -EAGAIN; + if (launder) { + nfs_write_error_remove_page(req); + goto out; + } } + nfs_redirty_request(req); + ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); @@ -596,12 +599,14 @@ out: return ret; } -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, + struct nfs_pageio_descriptor *pgio, bool launder) { int ret; nfs_pageio_cond_complete(pgio, page_file_index(page)); - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); + ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, + launder); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -612,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st /* * Write an mmapped page to the server. */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_writepage_locked(struct page *page, + struct writeback_control *wbc, + bool launder) { struct nfs_pageio_descriptor pgio; struct inode *inode = page_file_mapping(page)->host; @@ -621,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, &nfs_async_write_completion_ops); - err = nfs_do_writepage(page, wbc, &pgio); + err = nfs_do_writepage(page, wbc, &pgio, launder); nfs_pageio_complete(&pgio); if (err < 0) return err; @@ -634,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) { int ret; - ret = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(page, wbc, false); unlock_page(page); return ret; } @@ -643,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * { int ret; - ret = nfs_do_writepage(page, wbc, data); + ret = nfs_do_writepage(page, wbc, data, false); unlock_page(page); return ret; } @@ -1931,7 +1938,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) /* * Write back all requests on one page - we do this before reading it. */ -int nfs_wb_page(struct inode *inode, struct page *page) +int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) { loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); @@ -1948,7 +1955,7 @@ int nfs_wb_page(struct inode *inode, struct page *page) for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + ret = nfs_writepage_locked(page, &wbc, launder); if (ret < 0) goto out_error; continue; -- cgit v1.2.3 From dc602dd706cb64036132a7903ead1c67d9a7bcb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 11:44:06 -0500 Subject: NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs The flexfiles layout in particular, seems to want to poke around in the O_DIRECT flags when retransmitting. This patch sets up an interface to allow it to call back into O_DIRECT to handle retransmission correctly. It also fixes a potential bug whereby we could change the behaviour of O_DIRECT if an error is already pending. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..0aa3e6b3db70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1326,9 +1326,15 @@ static void nfs_async_write_error(struct list_head *head) } } +static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) +{ + nfs_async_write_error(&hdr->pages); +} + static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .error_cleanup = nfs_async_write_error, .completion = nfs_write_completion, + .reschedule_io = nfs_async_write_reschedule_io, }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, -- cgit v1.2.3 From af7cf057933f01dc7f33ddfb5e436ad598ed17ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Sep 2015 20:34:05 -0400 Subject: NFS: Allow multiple commit requests in flight per file Allow synchronous RPC calls to wait for pending RPC calls to finish, but also allow asynchronous ones to just fire off another commit. With this patch, the xfstests generic/074 test completes in 226s instead of 242s Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 70 +++++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aa3e6b3db70..ae29f082c9c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -1535,27 +1537,29 @@ static void nfs_writeback_result(struct rpc_task *task, } } +static int nfs_wait_atomic_killable(atomic_t *key) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); + return 0; +} -static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { - int ret; + return wait_on_atomic_t(&cinfo->rpcs_out, + nfs_wait_atomic_killable, TASK_KILLABLE); +} - if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) - return 1; - if (!may_wait) - return 0; - ret = out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - return (ret < 0) ? ret : 1; +static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +{ + atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_atomic(); - wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); + if (atomic_dec_and_test(&cinfo->rpcs_out)) + wake_up_atomic_t(&cinfo->rpcs_out); } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1693,7 +1697,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1755,8 +1758,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + nfs_commit_end(cinfo.mds); } static void nfs_commit_release(void *calldata) @@ -1775,7 +1777,6 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, - .error_cleanup = nfs_commit_clear_lock, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1794,30 +1795,25 @@ int nfs_commit_inode(struct inode *inode, int how) LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; + int error = 0; int res; - res = nfs_commit_set_lock(NFS_I(inode), may_wait); - if (res <= 0) - goto out_mark_dirty; nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_commit_begin(cinfo.mds); res = nfs_scan_commit(inode, &head, &cinfo); - if (res) { - int error; - + if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); - if (error < 0) - return error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_bit_action(&NFS_I(inode)->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - if (error < 0) - return error; - } else - nfs_commit_clear_lock(NFS_I(inode)); + nfs_commit_end(cinfo.mds); + if (error < 0) + goto out_error; + if (!may_wait) + goto out_mark_dirty; + error = wait_on_commit(cinfo.mds); + if (error < 0) + return error; return res; +out_error: + res = error; /* Note: If we exit without ensuring that the commit is complete, * we must mark the inode as dirty. Otherwise, future calls to * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure -- cgit v1.2.3 From b20135d0b2431900a3a5395970ffb7e4f3767c8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 09:28:06 -0500 Subject: NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid If the layout segment is invalid, then we should not be adding more write requests to the commit list. Instead, those writes should be replayed after requesting a new layout. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae29f082c9c2..0aa8d6f23b4c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list, } EXPORT_SYMBOL_GPL(nfs_retry_commit); +static void +nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + /* * Commit dirty pages */ @@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, + .resched_write = nfs_commit_resched_write, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1823,6 +1831,7 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return res; } +EXPORT_SYMBOL_GPL(nfs_commit_inode); int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { -- cgit v1.2.3 From 138a2935dc9783b131d9647c3bddb22ae5c84d77 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Oct 2015 17:17:06 -0400 Subject: NFS: Relax requirements in nfs_flush_incompatible If two processes share the same credentials and NFSv4 open stateid, then allow them both to dirty the same page, even if their nfs_open_context differs. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aa8d6f23b4c..2c26e04d9396 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1130,7 +1130,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) if (req == NULL) return 0; l_ctx = req->wb_lock_context; - do_flush = req->wb_page != page || req->wb_context != ctx; + do_flush = req->wb_page != page || + !nfs_match_open_context(req->wb_context, ctx); /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; if (l_ctx && flctx && -- cgit v1.2.3 From 210c7c1750fdf769647d1d526c9ea34c412c9eee Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Jan 2016 10:40:18 -0500 Subject: NFS: Use wait_on_atomic_t() for unlock after readahead The use of wait_on_atomic_t() for waiting on I/O to complete before unlocking allows us to git rid of the NFS_IO_INPROGRESS flag, and thus the nfs_iocounter's flags member, and finally the nfs_iocounter altogether. The count of I/O is moved to the lock context, and the counter increment/decrement functions become simple enough to open-code. Signed-off-by: Benjamin Coddington [Trond: Fix up conflict with existing function nfs_wait_atomic_killable()] Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 94828b3f8c95..8ba4f717b413 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1565,14 +1565,6 @@ static void nfs_writeback_result(struct rpc_task *task, } } -static int nfs_wait_atomic_killable(atomic_t *key) -{ - if (fatal_signal_pending(current)) - return -ERESTARTSYS; - freezable_schedule_unsafe(); - return 0; -} - static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { return wait_on_atomic_t(&cinfo->rpcs_out, -- cgit v1.2.3 From 6272dcc6beebbc2d8cf4165b628169e878f143e0 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Jan 2016 16:54:15 -0500 Subject: NFS: Simplify nfs_request_add_commit_list() arguments I noticed that all the callers of this function pass cinfo->mds->list as an argument in addition to the cinfo structure itself. Let's get rid of the extra argument, since it doesn't seem to be adding anything. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce43cd6d88c6..5754835a2886 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); * holding the nfs_page lock. */ void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, - struct nfs_commit_info *cinfo) +nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) { spin_lock(cinfo->lock); - nfs_request_add_commit_list_locked(req, dst, cinfo); + nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); spin_unlock(cinfo->lock); nfs_mark_page_unstable(req->wb_page, cinfo); } @@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, { if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) return; - nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); + nfs_request_add_commit_list(req, cinfo); } static void -- cgit v1.2.3