From 64aa7ed98db489d1c41ef140876ada38498678ab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:55:50 +0900 Subject: f2fs: change get_new_data_page to pass a locked node page This patch is for passing a locked node page to get_dnode_of_data. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864f8dfc..b8e34db37ae8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -387,7 +387,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, index, false); + page = get_new_data_page(inode, NULL, index, false); mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { -- cgit v1.2.3 From 9851e6e18943f2537acb44a4eb51c6958e8dbc3e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 28 Apr 2013 09:04:18 +0900 Subject: f2fs: reorganize f2fs_vm_page_mkwrite Few things can be changed in the default mkwrite function 1) Make file_update_time at the start before acquiring any lock 2) the condition page_offset(page) >= i_size_read(inode) should be changed to page_offset(page) > i_size_read 3) Move wait_on_page_writeback. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8e34db37ae8..9937ba1a4d24 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + file_update_time(vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || + page_offset(page) > i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); err = -EFAULT; @@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto out; - - /* fill the page */ - wait_on_page_writeback(page); + goto mapped; /* page is wholly or partially inside EOF */ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { @@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); - file_update_time(vma->vm_file); +mapped: + /* fill the page */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(err); -- cgit v1.2.3 From 77888c1e42e8c76e16204cd99c19a01829421402 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 20:28:47 +0900 Subject: f2fs: add f2fs_readonly() Introduce a simple macro function for readability. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9937ba1a4d24..316bcfe80448 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -114,7 +114,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) .for_reclaim = 0, }; - if (inode->i_sb->s_flags & MS_RDONLY) + if (f2fs_readonly(inode->i_sb)) return 0; trace_f2fs_sync_file_enter(inode); -- cgit v1.2.3 From b292dcab068e141d8a820b77cbcc88d98c610eb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 08:02:02 +0900 Subject: f2fs: reuse the locked dnode page and its inode This patch fixes the following deadlock bug during the recovery. INFO: task mount:1322 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mount D ffffffff81125870 0 1322 1266 0x00000000 ffff8801207e39d8 0000000000000046 ffff88012ab1dee0 0000000000000046 ffff8801207e3a08 ffff880115903f40 ffff8801207e3fd8 ffff8801207e3fd8 ffff8801207e3fd8 ffff880115903f40 ffff8801207e39d8 ffff88012fc94520 Call Trace: [] ? __lock_page+0x70/0x70 [] schedule+0x29/0x70 [] io_schedule+0x8f/0xd0 [] sleep_on_page+0xe/0x20 [] __wait_on_bit_lock+0x5a/0xc0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] find_lock_page+0x67/0x80 [] find_or_create_page+0x3f/0xb0 [] ? sync_inode_page+0xa8/0xd0 [f2fs] [] get_node_page+0x67/0x180 [f2fs] [] recover_fsync_data+0xacb/0xff0 [f2fs] [] ? _raw_spin_unlock+0x3e/0x40 [] f2fs_fill_super+0x7d4/0x850 [f2fs] [] mount_bdev+0x1c9/0x210 [] ? validate_superblock+0x180/0x180 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x1b0 [] ? __alloc_percpu+0x10/0x20 [] vfs_kern_mount+0x76/0x120 [] do_mount+0x237/0xa10 [] ? strndup_user+0x5b/0x80 [] SyS_mount+0x90/0xe0 [] system_call_fastpath+0x16/0x1b The bug is triggered when check_index_in_prev_nodes tries to get the direct node page by calling get_node_page. At this point, if the direct node page is already locked by get_dnode_of_data, its caller, we got a deadlock condition. This patch adds additional condition check for the reuse of locked direct node pages prior to the get_node_page call. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 316bcfe80448..deefd258b847 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -168,7 +168,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); -- cgit v1.2.3 From 2d4d9fb591fe83d9f0559afaa9736ebc8edad0aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 16:33:07 +0900 Subject: f2fs: fix i_blocks translation on various types of files Basically an inode manages the number of allocated blocks with inode->i_blocks which is represented in a unit of sectors, not file system blocks. But, f2fs has used i_blocks in a unit of file system blocks, and f2fs_getattr translates it to the number of sectors when fstat is called. However, previously f2fs_file_inode_operations only has this, so this patch adds it to all the types of inode_operations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index deefd258b847..8d2fce9e7e41 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -291,7 +291,7 @@ void f2fs_truncate(struct inode *inode) } } -static int f2fs_getattr(struct vfsmount *mnt, +int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; -- cgit v1.2.3 From 6a3e8ef0de1e548d1cf9bcf51d9b7b6f4141fec5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:28 +0900 Subject: f2fs: use the F2FS specific flags in f2fs_ioctl() In f2fs_ioctl() function, it is using generic flags. Since F2FS specific flags are defined. So lets use those flags. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8d2fce9e7e41..85b665d176a2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -575,10 +575,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int ret; switch (cmd) { - case FS_IOC_GETFLAGS: + case F2FS_IOC_GETFLAGS: flags = fi->i_flags & FS_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); - case FS_IOC_SETFLAGS: + case F2FS_IOC_SETFLAGS: { unsigned int oldflags; -- cgit v1.2.3 From d7cc950b4c910e4440485be784493880a0d09086 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:40 +0900 Subject: f2fs: optimise the truncate_data_blocks_range() range The function truncate_data_blocks_range() decrements the valid block count of inode via dec_valid_block_count(). Since this function updates the i_blocks field of inode, we can update this field once we have calculated total the number of blocks to be freed. Therefore we can decrement valid blocks outside of the for loop. if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } 'nr_free' tells the total number of blocks freed. So, we can just directly pass this value to dec_valid_block_count() and update the i_blocks. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 85b665d176a2..2f649b848521 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -185,10 +185,10 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) update_extent_cache(NULL_ADDR, dn); invalidate_blocks(sbi, blkaddr); - dec_valid_block_count(sbi, dn->inode, 1); nr_free++; } if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } -- cgit v1.2.3 From b3783873cc2214542d3da9a1aa800b20919d5889 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Jun 2013 09:17:01 +0900 Subject: f2fs: avoid freqeunt write_inode calls If update_inode is called, we don't need to do write_inode. So, let's use a *dirty* flag for each inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f649b848521..fda226ff1849 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -147,6 +147,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); ret = f2fs_write_inode(inode, NULL); if (ret) goto out; -- cgit v1.2.3 From 354a3399dc6f7e556d04e1c731cd50e08eeb44bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 14 Jun 2013 08:52:35 +0900 Subject: f2fs: recover wrong pino after checkpoint during fsync If a file is linked, f2fs loose its parent inode number so that fsync calls for the linked file should do checkpoint all the time. But, if we can recover its parent inode number after the checkpoint, we can adjust roll-forward mechanism for the further fsync calls, which is able to improve the fsync performance significatly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fda226ff1849..d2d2b7dbdcc1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + dentry = d_find_any_alias(inode); + iput(inode); + if (!dentry) + return 0; + + inode = igrab(dentry->d_parent->d_inode); + dput(dentry); + + *pino = inode->i_ino; + iput(inode); + return 1; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_cp_file(inode)) + else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; @@ -142,8 +160,19 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; if (need_cp) { + nid_t pino; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { -- cgit v1.2.3