From d47992f86b307985b3215bcf141d56d1849d71df Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 21 May 2013 23:17:23 -0400 Subject: mm: change invalidatepage prototype to accept length Currently there is no way to truncate partial page where the end truncate point is not at the end of the page. This is because it was not needed and the functionality was enough for file system truncate operation to work properly. However more file systems now support punch hole feature and it can benefit from mm supporting truncating page just up to the certain point. Specifically, with this functionality truncate_inode_pages_range() can be changed so it supports truncating partial page at the end of the range (currently it will BUG_ON() if 'end' is not at the end of the page). This commit changes the invalidatepage() address space operation prototype to accept range to be invalidated and update all the instances for it. We also change the block_invalidatepage() in the same way and actually make a use of the new length argument implementing range invalidation. Actual file system implementations will follow except the file systems where the changes are really simple and should not change the behaviour in any way .Implementation for truncate_page_range() which will be able to accept page unaligned ranges will follow as well. Signed-off-by: Lukas Czerner Cc: Andrew Morton Cc: Hugh Dickins --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 43db02e9c9fa..9f696014988d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -364,7 +364,7 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); - void (*invalidatepage) (struct page *, unsigned long); + void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, -- cgit v1.2.3 From 5c0ba4e0762e6dabd14a5c276652e2defec38de7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 13:52:59 -0400 Subject: [readdir] introduce iterate_dir() and dir_context iterate_dir(): new helper, replacing vfs_readdir(). struct dir_context: contains the readdir callback (and will get more stuff in it), embedded into whatever data that callback wants to deal with; eventually, we'll be passing it to ->readdir() replacement instead of (data,filldir) pair. Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 65c2be22b601..643e5b6cbaf5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1506,6 +1506,9 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); * to have different dirent layouts depending on the binary type. */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); +struct dir_context { + filldir_t actor; +}; struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -2494,6 +2497,7 @@ loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); extern int vfs_readdir(struct file *, filldir_t, void *); +extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_stat(const char __user *, struct kstat *); extern int vfs_lstat(const char __user *, struct kstat *); -- cgit v1.2.3 From bb6f619b3a49f940d7478112500da312d70866eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 18:49:12 -0400 Subject: [readdir] introduce ->iterate(), ctx->pos, dir_emit() New method - ->iterate(file, ctx). That's the replacement for ->readdir(); it takes callback from ctx->actor, uses ctx->pos instead of file->f_pos and calls dir_emit(ctx, ...) instead of filldir(data, ...). It does *not* update file->f_pos (or look at it, for that matter); iterate_dir() does the update. Note that dir_emit() takes the offset from ctx->pos (and eventually filldir_t will lose that argument). Signed-off-by: Al Viro --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 643e5b6cbaf5..b9641ae68da8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,7 +1508,15 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; + loff_t pos; }; + +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -1525,6 +1533,7 @@ struct file_operations { ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); + int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); -- cgit v1.2.3 From 5f99f4e79abc64ed9d93a4b0158b21c64ff7f478 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 20:23:06 -0400 Subject: [readdir] switch dcache_readdir() users to ->iterate() new helpers - dir_emit_dot(file, ctx, dentry), dir_emit_dotdot(file, ctx), dir_emit_dots(file, ctx). Signed-off-by: Al Viro --- include/linux/fs.h | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index b9641ae68da8..40293a6ce804 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1511,12 +1511,6 @@ struct dir_context { loff_t pos; }; -static inline bool dir_emit(struct dir_context *ctx, - const char *name, int namelen, - u64 ino, unsigned type) -{ - return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; -} struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -2537,7 +2531,7 @@ extern void iterate_supers_type(struct file_system_type *, extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); -extern int dcache_readdir(struct file *, void *, filldir_t); +extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct dentry *, struct iattr *); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct dentry *, struct kstatfs *); @@ -2701,4 +2695,35 @@ static inline void inode_has_no_xattr(struct inode *inode) inode->i_flags |= S_NOSEC; } +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} +static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) +{ + return ctx->actor(ctx, ".", 1, ctx->pos, + file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0; +} +static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) +{ + return ctx->actor(ctx, "..", 2, ctx->pos, + parent_ino(file->f_path.dentry), DT_DIR) == 0; +} +static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) +{ + if (ctx->pos == 0) { + if (!dir_emit_dot(file, ctx)) + return false; + ctx->pos = 1; + } + if (ctx->pos == 1) { + if (!dir_emit_dotdot(file, ctx)) + return false; + ctx->pos = 2; + } + return true; +} + #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From 5ded75ec4c576577cae7d556e671d11d0c80c2fc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 21:02:48 -0400 Subject: [readdir] convert ext3 new helper: dir_relax(inode). Call when you are in location that will _not_ be invalidated by directory modifications (block boundary, in case of ext*). Returns whether the directory has survived (dropping i_mutex allows rmdir to kill the sucker; if it returns false to us, ->iterate() is obviously done) Signed-off-by: Al Viro --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 40293a6ce804..aa9770c7e8df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2725,5 +2725,11 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) } return true; } +static inline bool dir_relax(struct inode *inode) +{ + mutex_unlock(&inode->i_mutex); + mutex_lock(&inode->i_mutex); + return !IS_DEADDIR(inode); +} #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From 2233f31aade393641f0eaed43a71110e629bb900 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 May 2013 21:44:23 -0400 Subject: [readdir] ->readdir() is gone everything's converted to ->iterate() Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index aa9770c7e8df..237af62976a6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1526,7 +1526,6 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - int (*readdir) (struct file *, void *, filldir_t); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); -- cgit v1.2.3 From ac6614b76478e68173ccf7ad4e9e98035cc9c21d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 May 2013 22:22:04 -0400 Subject: [readdir] constify ->actor Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 237af62976a6..7c30e3a62baf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1507,7 +1507,7 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { - filldir_t actor; + const filldir_t actor; loff_t pos; }; -- cgit v1.2.3 From 60545d0d4610b02e55f65d141c95b18ccf855b6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Jun 2013 01:20:27 -0400 Subject: [O_TMPFILE] it's still short a few helpers, but infrastructure should be OK now... Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c30e3a62baf..dd6615f0fd13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,6 +1580,7 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, -- cgit v1.2.3 From f4e0c30c191f87851c4a53454abb55ee276f4a7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Jun 2013 08:34:36 +0400 Subject: allow the temp files created by open() to be linked to O_TMPFILE | O_CREAT => linkat() with AT_SYMLINK_FOLLOW and /proc/self/fd/ as oldpath (i.e. flink()) will create a link O_TMPFILE | O_CREAT | O_EXCL => ENOENT on attempt to link those guys Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index dd6615f0fd13..ab11c44b0697 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,6 +1744,7 @@ struct super_operations { #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) +#define I_LINKABLE (1 << 10) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) -- cgit v1.2.3 From 0b3fca1fd1499f0f5a7486d494f96538f2b7e5b9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Jun 2013 11:37:47 +0400 Subject: kill find_inode_number() the only remaining caller (in ncpfs) is guaranteed to return 0 - we only hit it if we'd just checked that there's no dentry with such name. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index ab11c44b0697..1db01c13ddce 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2311,7 +2311,6 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); -extern ino_t find_inode_number(struct dentry *, struct qstr *); #include -- cgit v1.2.3 From 1bf9d14dff4a2c4de6152c6f751bdaf6896b68bb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jun 2013 20:27:42 +0400 Subject: new helper: fixed_size_llseek() Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1db01c13ddce..803b7fa2520a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2428,6 +2428,8 @@ extern loff_t no_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +extern loff_t fixed_size_llseek(struct file *file, loff_t offset, + int whence, loff_t size); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From 68d70d03f8f5bd10a0e7337210b13f536fd4aeb9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Jun 2013 15:26:04 +0400 Subject: constify rw_verify_area() Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803b7fa2520a..68f10204ab29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1898,7 +1898,6 @@ extern int current_umask(void); extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) -extern int rw_verify_area(int, struct file *, loff_t *, size_t); #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 -- cgit v1.2.3 From f891a29f46553a384edbaa0f6ac446b1d03bccac Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:09 -0400 Subject: locks: drop the unused filp argument to posix_unblock_lock Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 68f10204ab29..172303655702 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -994,7 +994,7 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); -extern int posix_unblock_lock(struct file *, struct file_lock *); +extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); @@ -1084,8 +1084,7 @@ static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) return -ENOLCK; } -static inline int posix_unblock_lock(struct file *filp, - struct file_lock *waiter) +static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; } -- cgit v1.2.3 From 1a9e64a7118c5ad13dd5119da18375a5bd45b330 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:10 -0400 Subject: cifs: use posix_unblock_lock instead of locks_delete_block commit 66189be74 (CIFS: Fix VFS lock usage for oplocked files) exported the locks_delete_block symbol. There's already an exported helper function that provides this capability however, so make cifs use that instead and turn locks_delete_block back into a static function. Note that if fl->fl_next == NULL then this lock has already been through locks_delete_block(), so we should be OK to ignore an ENOENT error here and simply not retry the lock. Cc: Pavel Shilovsky Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 172303655702..6cfc9a29a783 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1006,7 +1006,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern void locks_delete_block(struct file_lock *waiter); extern void lock_flocks(void); extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ @@ -1150,10 +1149,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start, return 1; } -static inline void locks_delete_block(struct file_lock *waiter) -{ -} - static inline void lock_flocks(void) { } -- cgit v1.2.3 From 1cb360125966cb6cb594e414ea80a0154617b846 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:12 -0400 Subject: locks: comment cleanups and clarifications Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6cfc9a29a783..ed9fdaaf3223 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -926,6 +926,24 @@ int locks_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include +/* + * struct file_lock represents a generic "file lock". It's used to represent + * POSIX byte range locks, BSD (flock) locks, and leases. It's important to + * note that the same struct is used to represent both a request for a lock and + * the lock itself, but the same object is never used for both. + * + * FIXME: should we create a separate "struct lock_request" to help distinguish + * these two uses? + * + * The i_flock list is ordered by: + * + * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX + * 2) lock owner + * 3) lock range start + * 4) lock range end + * + * Obviously, the last two criteria only matter for POSIX locks. + */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ struct list_head fl_link; /* doubly linked list of all locks */ -- cgit v1.2.3 From 1c8c601a8c0dc59fe64907dcd9d512a3d181ddc7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:15 -0400 Subject: locks: protect most of the file_lock handling with i_lock Having a global lock that protects all of this code is a clear scalability problem. Instead of doing that, move most of the code to be protected by the i_lock instead. The exceptions are the global lists that the ->fl_link sits on, and the ->fl_block list. ->fl_link is what connects these structures to the global lists, so we must ensure that we hold those locks when iterating over or updating these lists. Furthermore, sound deadlock detection requires that we hold the blocked_list state steady while checking for loops. We also must ensure that the search and update to the list are atomic. For the checking and insertion side of the blocked_list, push the acquisition of the global lock into __posix_lock_file and ensure that checking and update of the blocked_list is done without dropping the lock in between. On the removal side, when waking up blocked lock waiters, take the global lock before walking the blocked list and dequeue the waiters from the global list prior to removal from the fl_block list. With this, deadlock detection should be race free while we minimize excessive file_lock_lock thrashing. Finally, in order to avoid a lock inversion problem when handling /proc/locks output we must ensure that manipulations of the fl_block list are also protected by the file_lock_lock. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed9fdaaf3223..24fe998795e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern void lock_flocks(void); -extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, struct flock __user *user) { @@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start, { return 1; } - -static inline void lock_flocks(void) -{ -} - -static inline void unlock_flocks(void) -{ -} - #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.3 From 139ca04ee572fea6c0c105e88aba3a534efcd7c4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:17 -0400 Subject: locks: convert fl_link to a hlist_node Testing has shown that iterating over the blocked_list for deadlock detection turns out to be a bottleneck. In order to alleviate that, begin the process of turning it into a hashtable. We start by turning the fl_link into a hlist_node and the global lists into hlists. A later patch will do the conversion of the blocked_list to a hashtable. Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 24fe998795e1..fab064a3b65f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -946,7 +946,7 @@ int locks_in_grace(struct net *); */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ - struct list_head fl_link; /* doubly linked list of all locks */ + struct hlist_node fl_link; /* node in global lists */ struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; unsigned int fl_flags; -- cgit v1.2.3 From 3999e49364193f7dbbba66e2be655fe91ba1fced Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:19 -0400 Subject: locks: add a new "lm_owner_key" lock operation Currently, the hashing that the locking code uses to add these values to the blocked_hash is simply calculated using fl_owner field. That's valid in most cases except for server-side lockd, which validates the owner of a lock based on fl_owner and fl_pid. In the case where you have a small number of NFS clients doing a lot of locking between different processes, you could end up with all the blocked requests sitting in a very small number of hash buckets. Add a new lm_owner_key operation to the lock_manager_operations that will generate an unsigned long to use as the key in the hashtable. That function is only implemented for server-side lockd, and simply XORs the fl_owner and fl_pid. Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index fab064a3b65f..a137a73fc1fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -908,6 +908,7 @@ struct file_lock_operations { struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); + unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); -- cgit v1.2.3 From 46a1c2c7ae53de2a5676754b54a73c591a3951d2 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 25 Jun 2013 12:02:13 +0800 Subject: vfs: export lseek_execute() to modules For those file systems(btrfs/ext4/ocfs2/tmpfs) that support SEEK_DATA/SEEK_HOLE functions, we end up handling the similar matter in lseek_execute() to update the current file offset to the desired offset if it is valid, ceph also does the simliar things at ceph_llseek(). To reduce the duplications, this patch make lseek_execute() public accessible so that we can call it directly from the underlying file systems. Thanks Dave Chinner for this suggestion. [AV: call it vfs_setpos(), don't bring the removed 'inode' argument back] v2->v1: - Add kernel-doc comments for lseek_execute() - Call lseek_execute() in ceph->llseek() Signed-off-by: Jie Liu Cc: Dave Chinner Cc: Al Viro Cc: Andi Kleen Cc: Andrew Morton Cc: Christoph Hellwig Cc: Chris Mason Cc: Josef Bacik Cc: Ben Myers Cc: Ted Tso Cc: Hugh Dickins Cc: Mark Fasheh Cc: Joel Becker Cc: Sage Weil Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index a137a73fc1fe..bccb1924ec93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2426,6 +2426,7 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); extern loff_t no_llseek(struct file *file, loff_t offset, int whence); +extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); -- cgit v1.2.3 From b45972265f823ed01eae0867a176320071665787 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:05 -0700 Subject: mm: vmscan: take page buffers dirty and locked state into account Page reclaim keeps track of dirty and under writeback pages and uses it to determine if wait_iff_congested() should stall or if kswapd should begin writing back pages. This fails to account for buffer pages that can be under writeback but not PageWriteback which is the case for filesystems like ext3 ordered mode. Furthermore, PageDirty buffer pages can have all the buffers clean and writepage does no IO so it should not be accounted as congested. This patch adds an address_space operation that filesystems may optionally use to check if a page is really dirty or really under writeback. An implementation is provided for for buffer_heads is added and used for block operations and ext3 in ordered mode. By default the page flags are obeyed. Credit goes to Jan Kara for identifying that the page flags alone are not sufficient for ext3 and sanity checking a number of ideas on how the problem could be addressed. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b82c8041490..99be011e00de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -380,6 +380,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); /* swapfile support */ -- cgit v1.2.3 From 7012b02a2b2c42bb1e1d95040a6e3bb59c7284f7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:22 -0400 Subject: locks: move file_lock_list to a set of percpu hlist_heads and convert file_lock_lock to an lglock The file_lock_list is only used for /proc/locks. The vastly common case is for locks to be put onto the list and come off again, without ever being traversed. Help optimize for this use-case by moving to percpu hlist_head-s. At the same time, we can make the locking less contentious by moving to an lglock. When iterating over the lists for /proc/locks, we must take the global lock and then iterate over each CPU's list in turn. This change necessitates a new fl_link_cpu field to keep track of which CPU the entry is on. On x86_64 at least, this field is placed within an existing hole in the struct to avoid growing the size. Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 99be011e00de..834c9e5113d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -954,6 +954,7 @@ struct file_lock { unsigned int fl_flags; unsigned char fl_type; unsigned int fl_pid; + int fl_link_cpu; /* what cpu's list is this on? */ struct pid *fl_nspid; wait_queue_head_t fl_wait; struct file *fl_file; -- cgit v1.2.3 From ef277c73ca3b1aade278036ae11640090681d558 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Mon, 8 Jul 2013 16:00:21 -0700 Subject: page migration: fix wrong comment in address_space_operations.migratepage() There is no parameter "sync" in address_space_operations->migratepage(). It should be migrate_mode. And the comment is for MIGRATE_ASYNC. Signed-off-by: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 99be011e00de..cb771ecc2362 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -372,8 +372,8 @@ struct address_space_operations { int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); /* - * migrate the contents of a page to the specified target. If sync - * is false, it must not block. + * migrate the contents of a page to the specified target. If + * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); -- cgit v1.2.3 From 4f5e65a1cc90bbb15b9f6cdc362922af1bcc155a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 14:24:16 -0700 Subject: fput: turn "list_head delayed_fput_list" into llist_head fput() and delayed_fput() can use llist and avoid the locking. This is unlikely path, it is not that this change can improve the performance, but this way the code looks simpler. Signed-off-by: Oleg Nesterov Suggested-by: Andrew Morton Cc: Al Viro Cc: Andrey Vagin Cc: "Eric W. Biederman" Cc: David Howells Cc: Huang Ying Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 834c9e5113d9..d40e8e78bbd1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -768,6 +769,7 @@ struct file { */ union { struct list_head fu_list; + struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; -- cgit v1.2.3