From 7747bd4bceb3079572695d3942294a6c7b265557 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 2 Jul 2013 22:38:35 +1000 Subject: sync: don't block the flusher thread waiting on IO When sync does it's WB_SYNC_ALL writeback, it issues data Io and then immediately waits for IO completion. This is done in the context of the flusher thread, and hence completely ties up the flusher thread for the backing device until all the dirty inodes have been synced. On filesystems that are dirtying inodes constantly and quickly, this means the flusher thread can be tied up for minutes per sync call and hence badly affect system level write IO performance as the page cache cannot be cleaned quickly. We already have a wait loop for IO completion for sync(2), so cut this out of the flusher thread and delegate it to wait_sb_inodes(). Hence we can do rapid IO submission, and then wait for it all to complete. Effect of sync on fsmark before the patch: FSUse% Count Size Files/sec App Overhead ..... 0 640000 4096 35154.6 1026984 0 720000 4096 36740.3 1023844 0 800000 4096 36184.6 916599 0 880000 4096 1282.7 1054367 0 960000 4096 3951.3 918773 0 1040000 4096 40646.2 996448 0 1120000 4096 43610.1 895647 0 1200000 4096 40333.1 921048 And a single sync pass took: real 0m52.407s user 0m0.000s sys 0m0.090s After the patch, there is no impact on fsmark results, and each individual sync(2) operation run concurrently with the same fsmark workload takes roughly 7s: real 0m6.930s user 0m0.000s sys 0m0.039s IOWs, sync is 7-8x faster on a busy filesystem and does not have an adverse impact on ongoing async data write operations. Signed-off-by: Dave Chinner Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..a85ac4e33436 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,6 +45,7 @@ struct wb_writeback_work { unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; + unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -443,9 +444,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* * Make sure to wait on the data before writing out the metadata. * This is important for filesystems that modify metadata on data - * I/O completion. + * I/O completion. We don't do it for sync(2) writeback because it has a + * separate, external IO completion path and ->sync_fs for guaranteeing + * inode metadata is written back correctly. */ - if (wbc->sync_mode == WB_SYNC_ALL) { + if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; @@ -578,6 +581,7 @@ static long writeback_sb_inodes(struct super_block *sb, .tagged_writepages = work->tagged_writepages, .for_kupdate = work->for_kupdate, .for_background = work->for_background, + .for_sync = work->for_sync, .range_cyclic = work->range_cyclic, .range_start = 0, .range_end = LLONG_MAX, @@ -1362,6 +1366,7 @@ void sync_inodes_sb(struct super_block *sb) .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, + .for_sync = 1, }; /* Nothing to do? */ -- cgit v1.2.3 From 12057841008534236e52df3d3e63e089f27c5406 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 8 Jul 2013 16:00:11 -0700 Subject: fs/fs-writeback.c: : make wb_do_writeback() as static It's not used globally and could be static. Signed-off-by: Haicheng Li Cc: Jan Kara Cc: Wu Fengguang Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a85ac4e33436..aca8835c8c17 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -963,7 +963,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) /* * Retrieve work items and do the writeback they describe */ -long wb_do_writeback(struct bdi_writeback *wb, int force_wait) +static long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; struct wb_writeback_work *work; -- cgit v1.2.3 From 25d130ba22362757a90135fd8a0f75cc7fc71e79 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 8 Jul 2013 16:00:14 -0700 Subject: mm/writeback: don't check force_wait to handle bdi->work_list After commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue"), bdi_writeback_workfn runs off bdi_writeback->dwork, on each execution, it processes bdi->work_list and reschedules if there are more things to do instead of flush any work that race with us existing. It is unecessary to check force_wait in wb_do_writeback since it is always 0 after the mentioned commit. This patch remove the force_wait in wb_do_writeback. Signed-off-by: Wanpeng Li Reviewed-by: Tejun Heo Reviewed-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index aca8835c8c17..68851ff2fd41 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -963,7 +963,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) /* * Retrieve work items and do the writeback they describe */ -static long wb_do_writeback(struct bdi_writeback *wb, int force_wait) +static long wb_do_writeback(struct bdi_writeback *wb) { struct backing_dev_info *bdi = wb->bdi; struct wb_writeback_work *work; @@ -971,12 +971,6 @@ static long wb_do_writeback(struct bdi_writeback *wb, int force_wait) set_bit(BDI_writeback_running, &wb->bdi->state); while ((work = get_next_work_item(bdi)) != NULL) { - /* - * Override sync mode, in case we must wait for completion - * because this thread is exiting now. - */ - if (force_wait) - work->sync_mode = WB_SYNC_ALL; trace_writeback_exec(bdi, work); @@ -1025,7 +1019,7 @@ void bdi_writeback_workfn(struct work_struct *work) * rescuer as work_list needs to be drained. */ do { - pages_written = wb_do_writeback(wb, 0); + pages_written = wb_do_writeback(wb); trace_writeback_pages_written(pages_written); } while (!list_empty(&bdi->work_list)); } else { -- cgit v1.2.3