From f0059afd3e6e7aa1a0ffc23468b74c43d47660b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:35 -0800 Subject: buffer: make touch_buffer() an exported function We want to add a trace point to touch_buffer() but macros and inline functions defined in header files can't have tracing points. Move touch_buffer() to fs/buffer.c and make it a proper function. The new exported function is also declared inline. As most uses of touch_buffer() are inside buffer.c with nilfs2 as the only other user, the effect of this change should be negligible. Signed-off-by: Tejun Heo Cc: Steven Rostedt Signed-off-by: Jens Axboe --- fs/buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index c017a2dfb909..a8c2dfb68dcd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -53,6 +53,12 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) } EXPORT_SYMBOL(init_buffer); +inline void touch_buffer(struct buffer_head *bh) +{ + mark_page_accessed(bh->b_page); +} +EXPORT_SYMBOL(touch_buffer); + static int sleep_on_buffer(void *word) { io_schedule(); -- cgit v1.2.3 From 5305cb830834549b9203ad4d009ad5483c5e293f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:36 -0800 Subject: block: add block_{touch|dirty}_buffer tracepoint The former is triggered from touch_buffer() and the latter mark_buffer_dirty(). This is part of tracepoint additions to improve visiblity into dirtying / writeback operations for io tracer and userland. v2: Transformed writeback_dirty_buffer to block_dirty_buffer and made it share TP definition with block_touch_buffer. Signed-off-by: Tejun Heo Cc: Fengguang Wu Signed-off-by: Jens Axboe --- fs/buffer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index a8c2dfb68dcd..87ff335fbbe3 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -55,6 +56,7 @@ EXPORT_SYMBOL(init_buffer); inline void touch_buffer(struct buffer_head *bh) { + trace_block_touch_buffer(bh); mark_page_accessed(bh->b_page); } EXPORT_SYMBOL(touch_buffer); @@ -1119,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); + trace_block_dirty_buffer(bh); + /* * Very *carefully* optimize the it-is-already-dirty case. * -- cgit v1.2.3 From 1d1d1a767206fbe5d4c69493b7e6d2a8d08cc0a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2013 16:42:51 -0800 Subject: mm: only enforce stable page writes if the backing device requires it Create a helper function to check if a backing device requires stable page writes and, if so, performs the necessary wait. Then, make it so that all points in the memory manager that handle making pages writable use the helper function. This should provide stable page write support to most filesystems, while eliminating unnecessary waiting for devices that don't require the feature. Before this patchset, all filesystems would block, regardless of whether or not it was necessary. ext3 would wait, but still generate occasional checksum errors. The network filesystems were left to do their own thing, so they'd wait too. After this patchset, all the disk filesystems except ext3 and btrfs will wait only if the hardware requires it. ext3 (if necessary) snapshots pages instead of blocking, and btrfs provides its own bdi so the mm will never wait. Network filesystems haven't been touched, so either they provide their own stable page guarantees or they don't block at all. The blocking behavior is back to what it was before 3.0 if you don't have a disk requiring stable page writes. Here's the result of using dbench to test latency on ext2: 3.8.0-rc3: Operation Count AvgLat MaxLat ---------------------------------------- WriteX 109347 0.028 59.817 ReadX 347180 0.004 3.391 Flush 15514 29.828 287.283 Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms 3.8.0-rc3 + patches: WriteX 105556 0.029 4.273 ReadX 335004 0.005 4.112 Flush 14982 30.540 298.634 Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms As you can see, the maximum write latency drops considerably with this patch enabled. The other filesystems (ext3/ext4/xfs/btrfs) behave similarly, but see the cover letter for those results. Signed-off-by: Darrick J. Wong Acked-by: Steven Whitehouse Reviewed-by: Jan Kara Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Artem Bityutskiy Cc: Joel Becker Cc: Mark Fasheh Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 7a75c3e0fd58..2ea9cd44aeae 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret < 0)) goto out_unlock; set_page_dirty(page); - wait_on_page_writeback(page); + wait_for_stable_page(page); return 0; out_unlock: unlock_page(page); -- cgit v1.2.3 From 496ad9aa8ef448058e36ca7a787c61f2e63f0f54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Jan 2013 17:07:38 -0500 Subject: new helper: file_inode(file) Signed-off-by: Al Viro --- fs/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 7a75c3e0fd58..b8a8b4d64d8c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2332,7 +2332,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); unsigned long end; loff_t size; int ret; @@ -2371,7 +2371,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { int ret; - struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; + struct super_block *sb = file_inode(vma->vm_file)->i_sb; sb_start_pagefault(sb); -- cgit v1.2.3 From 43be594a6b0ce34ab5fa3b13e08727a99fb95b91 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Fri, 22 Feb 2013 16:35:46 -0800 Subject: fs/buffer.c: change type of max_buffer_heads to unsigned long max_buffer_heads is calculated from nr_free_buffer_pages(), so change its type to unsigned long in case of overflow. Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 2ea9cd44aeae..62169c192c21 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3227,7 +3227,7 @@ static struct kmem_cache *bh_cachep __read_mostly; * Once the number of bh's in the machine exceeds this level, we start * stripping them in writeback. */ -static int max_buffer_heads; +static unsigned long max_buffer_heads; int buffer_heads_over_limit; @@ -3343,7 +3343,7 @@ EXPORT_SYMBOL(bh_submit_read); void __init buffer_init(void) { - int nrpages; + unsigned long nrpages; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, -- cgit v1.2.3