From a8855990e382fc81c04187c5fdb48743307baf32 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 9 Jul 2013 22:36:45 +0800 Subject: writeback: Do not sort b_io list only because of block device inode It is very likely that block device inode will be part of BDI dirty list as well. However it doesn't make sence to sort inodes on the b_io list just because of this inode (as it contains buffers all over the device anyway). So save some CPU cycles which is valuable since we hold relatively contented wb->list_lock. Signed-off-by: Jan Kara --- fs/fs-writeback.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..cabdece4cb39 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -68,7 +68,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { struct super_block *sb = inode->i_sb; - if (strcmp(sb->s_type->name, "bdev") == 0) + if (sb_is_blkdev_sb(sb)) return inode->i_mapping->backing_dev_info; return sb->s_bdi; @@ -250,11 +250,13 @@ static int move_expired_inodes(struct list_head *delaying_queue, if (work->older_than_this && inode_dirtied_after(inode, *work->older_than_this)) break; + list_move(&inode->i_wb_list, &tmp); + moved++; + if (sb_is_blkdev_sb(inode->i_sb)) + continue; if (sb && sb != inode->i_sb) do_sb_sort = 1; sb = inode->i_sb; - list_move(&inode->i_wb_list, &tmp); - moved++; } /* just one sb in list, splice to dispatch_queue and we're done */ -- cgit v1.2.3 From 47df3ddedd22c3f8e68aff831edb7921937674a2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Sep 2013 14:22:22 -0700 Subject: writeback: fix occasional slow sync(1) In case when system contains no dirty pages, wakeup_flusher_threads() will submit WB_SYNC_NONE writeback for 0 pages so wb_writeback() exits immediately without doing anything, even though there are dirty inodes in the system. Thus sync(1) will write all the dirty inodes from a WB_SYNC_ALL writeback pass which is slow. Fix the problem by using get_nr_dirty_pages() in wakeup_flusher_threads() instead of calculating number of dirty pages manually. That function also takes number of dirty inodes into account. Signed-off-by: Jan Kara Reported-by: Paul Taysom Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68851ff2fd41..87d778118027 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) { struct backing_dev_info *bdi; - if (!nr_pages) { - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - } + if (!nr_pages) + nr_pages = get_nr_dirty_pages(); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { -- cgit v1.2.3 From 7d9f073b8da45a894bb7148433bd84d21eed6757 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 11 Sep 2013 14:22:40 -0700 Subject: mm/writeback: make writeback_inodes_wb static It's not used globally and could be static. Signed-off-by: Wanpeng Li Cc: Dave Hansen Cc: Rik van Riel Cc: Fengguang Wu Cc: Joonsoo Kim Cc: Johannes Weiner Cc: Tejun Heo Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: KOSAKI Motohiro Cc: Jiri Kosina Cc: Wanpeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 87d778118027..54b3c31c2f0d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb, return wrote; } -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, +static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { struct wb_writeback_work work = { -- cgit v1.2.3 From 146d7009b45cdb45ec3be8ad73177dae58f4bc91 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 11 Sep 2013 14:23:04 -0700 Subject: writeback: fix race that cause writeback hung There is a race between mark inode dirty and writeback thread, see the following scenario. In this case, writeback thread will not run though there is dirty_io. __mark_inode_dirty() bdi_writeback_workfn() ... ... spin_lock(&inode->i_lock); ... if (bdi_cap_writeback_dirty(bdi)) { <<< assume wb has dirty_io, so wakeup_bdi is false. <<< the following inode_dirty also have wakeup_bdi false. if (!wb_has_dirty_io(&bdi->wb)) wakeup_bdi = true; } spin_unlock(&inode->i_lock); <<< assume last dirty_io is removed here. pages_written = wb_do_writeback(wb); ... <<< work_list empty and wb has no dirty_io, <<< delayed_work will not be queued. if (!list_empty(&bdi->work_list) || (wb_has_dirty_io(wb) && dirty_writeback_interval)) queue_delayed_work(bdi_wq, &wb->dwork, msecs_to_jiffies(dirty_writeback_interval * 10)); spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; <<< new dirty_io is added. list_move(&inode->i_wb_list, &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); <<< though there is dirty_io, but wakeup_bdi is false, <<< so writeback thread will not be waked up and <<< the new dirty_io will not be flushed. if (wakeup_bdi) bdi_wakeup_thread_delayed(bdi); Writeback will run until there is a new flush work queued. This may cause a lot of dirty pages stay in memory for a long time. Signed-off-by: Junxiao Bi Reviewed-by: Jan Kara Cc: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 54b3c31c2f0d..30f6f27d5a59 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1171,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) bool wakeup_bdi = false; bdi = inode_to_bdi(inode); + spin_unlock(&inode->i_lock); + spin_lock(&bdi->wb.list_lock); if (bdi_cap_writeback_dirty(bdi)) { WARN(!test_bit(BDI_registered, &bdi->state), "bdi-%s not registered\n", bdi->name); @@ -1185,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = true; } - spin_unlock(&inode->i_lock); - spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); -- cgit v1.2.3