From d47992f86b307985b3215bcf141d56d1849d71df Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 21 May 2013 23:17:23 -0400
Subject: mm: change invalidatepage prototype to accept length

Currently there is no way to truncate partial page where the end
truncate point is not at the end of the page. This is because it was not
needed and the functionality was enough for file system truncate
operation to work properly. However more file systems now support punch
hole feature and it can benefit from mm supporting truncating page just
up to the certain point.

Specifically, with this functionality truncate_inode_pages_range() can
be changed so it supports truncating partial page at the end of the
range (currently it will BUG_ON() if 'end' is not at the end of the
page).

This commit changes the invalidatepage() address space operation
prototype to accept range to be invalidated and update all the instances
for it.

We also change the block_invalidatepage() in the same way and actually
make a use of the new length argument implementing range invalidation.

Actual file system implementations will follow except the file systems
where the changes are really simple and should not change the behaviour
in any way .Implementation for truncate_page_range() which will be able
to accept page unaligned ranges will follow as well.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43db02e9c9fa..9f696014988d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -364,7 +364,7 @@ struct address_space_operations {
 
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
-	void (*invalidatepage) (struct page *, unsigned long);
+	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-- 
cgit v1.2.3


From 5c0ba4e0762e6dabd14a5c276652e2defec38de7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 May 2013 13:52:59 -0400
Subject: [readdir] introduce iterate_dir() and dir_context

iterate_dir(): new helper, replacing vfs_readdir().

struct dir_context: contains the readdir callback (and will get more stuff
in it), embedded into whatever data that callback wants to deal with;
eventually, we'll be passing it to ->readdir() replacement instead of
(data,filldir) pair.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 65c2be22b601..643e5b6cbaf5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1506,6 +1506,9 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
  * to have different dirent layouts depending on the binary type.
  */
 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+struct dir_context {
+	filldir_t actor;
+};
 struct block_device_operations;
 
 /* These macros are for out of kernel modules to test that
@@ -2494,6 +2497,7 @@ loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
 
 extern int vfs_readdir(struct file *, filldir_t, void *);
+extern int iterate_dir(struct file *, struct dir_context *);
 
 extern int vfs_stat(const char __user *, struct kstat *);
 extern int vfs_lstat(const char __user *, struct kstat *);
-- 
cgit v1.2.3


From bb6f619b3a49f940d7478112500da312d70866eb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 May 2013 18:49:12 -0400
Subject: [readdir] introduce ->iterate(), ctx->pos, dir_emit()

New method - ->iterate(file, ctx).  That's the replacement for ->readdir();
it takes callback from ctx->actor, uses ctx->pos instead of file->f_pos and
calls dir_emit(ctx, ...) instead of filldir(data, ...).  It does *not*
update file->f_pos (or look at it, for that matter); iterate_dir() does the
update.

Note that dir_emit() takes the offset from ctx->pos (and eventually
filldir_t will lose that argument).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 643e5b6cbaf5..b9641ae68da8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1508,7 +1508,15 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 struct dir_context {
 	filldir_t actor;
+	loff_t pos;
 };
+
+static inline bool dir_emit(struct dir_context *ctx,
+			    const char *name, int namelen,
+			    u64 ino, unsigned type)
+{
+	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+}
 struct block_device_operations;
 
 /* These macros are for out of kernel modules to test that
@@ -1525,6 +1533,7 @@ struct file_operations {
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	int (*readdir) (struct file *, void *, filldir_t);
+	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
-- 
cgit v1.2.3


From 5f99f4e79abc64ed9d93a4b0158b21c64ff7f478 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 May 2013 20:23:06 -0400
Subject: [readdir] switch dcache_readdir() users to ->iterate()

new helpers - dir_emit_dot(file, ctx, dentry), dir_emit_dotdot(file, ctx),
dir_emit_dots(file, ctx).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b9641ae68da8..40293a6ce804 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1511,12 +1511,6 @@ struct dir_context {
 	loff_t pos;
 };
 
-static inline bool dir_emit(struct dir_context *ctx,
-			    const char *name, int namelen,
-			    u64 ino, unsigned type)
-{
-	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
-}
 struct block_device_operations;
 
 /* These macros are for out of kernel modules to test that
@@ -2537,7 +2531,7 @@ extern void iterate_supers_type(struct file_system_type *,
 extern int dcache_dir_open(struct inode *, struct file *);
 extern int dcache_dir_close(struct inode *, struct file *);
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
-extern int dcache_readdir(struct file *, void *, filldir_t);
+extern int dcache_readdir(struct file *, struct dir_context *);
 extern int simple_setattr(struct dentry *, struct iattr *);
 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int simple_statfs(struct dentry *, struct kstatfs *);
@@ -2701,4 +2695,35 @@ static inline void inode_has_no_xattr(struct inode *inode)
 		inode->i_flags |= S_NOSEC;
 }
 
+static inline bool dir_emit(struct dir_context *ctx,
+			    const char *name, int namelen,
+			    u64 ino, unsigned type)
+{
+	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+}
+static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
+{
+	return ctx->actor(ctx, ".", 1, ctx->pos,
+			  file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
+}
+static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
+{
+	return ctx->actor(ctx, "..", 2, ctx->pos,
+			  parent_ino(file->f_path.dentry), DT_DIR) == 0;
+}
+static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
+{
+	if (ctx->pos == 0) {
+		if (!dir_emit_dot(file, ctx))
+			return false;
+		ctx->pos = 1;
+	}
+	if (ctx->pos == 1) {
+		if (!dir_emit_dotdot(file, ctx))
+			return false;
+		ctx->pos = 2;
+	}
+	return true;
+}
+
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From 5ded75ec4c576577cae7d556e671d11d0c80c2fc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 May 2013 21:02:48 -0400
Subject: [readdir] convert ext3

new helper: dir_relax(inode).  Call when you are in location that will
_not_ be invalidated by directory modifications (block boundary, in case
of ext*).  Returns whether the directory has survived (dropping i_mutex
allows rmdir to kill the sucker; if it returns false to us, ->iterate()
is obviously done)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 40293a6ce804..aa9770c7e8df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2725,5 +2725,11 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
 	}
 	return true;
 }
+static inline bool dir_relax(struct inode *inode)
+{
+	mutex_unlock(&inode->i_mutex);
+	mutex_lock(&inode->i_mutex);
+	return !IS_DEADDIR(inode);
+}
 
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From 2233f31aade393641f0eaed43a71110e629bb900 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 22 May 2013 21:44:23 -0400
Subject: [readdir] ->readdir() is gone

everything's converted to ->iterate()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa9770c7e8df..237af62976a6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1526,7 +1526,6 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-	int (*readdir) (struct file *, void *, filldir_t);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
-- 
cgit v1.2.3


From ac6614b76478e68173ccf7ad4e9e98035cc9c21d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 22 May 2013 22:22:04 -0400
Subject: [readdir] constify ->actor

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 237af62976a6..7c30e3a62baf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1507,7 +1507,7 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
  */
 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 struct dir_context {
-	filldir_t actor;
+	const filldir_t actor;
 	loff_t pos;
 };
 
-- 
cgit v1.2.3


From 60545d0d4610b02e55f65d141c95b18ccf855b6e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 7 Jun 2013 01:20:27 -0400
Subject: [O_TMPFILE] it's still short a few helpers, but infrastructure should
 be OK now...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7c30e3a62baf..dd6615f0fd13 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1580,6 +1580,7 @@ struct inode_operations {
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
 			   umode_t create_mode, int *opened);
+	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 } ____cacheline_aligned;
 
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-- 
cgit v1.2.3


From f4e0c30c191f87851c4a53454abb55ee276f4a7e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Jun 2013 08:34:36 +0400
Subject: allow the temp files created by open() to be linked to

O_TMPFILE | O_CREAT => linkat() with AT_SYMLINK_FOLLOW and /proc/self/fd/<n>
as oldpath (i.e. flink()) will create a link
O_TMPFILE | O_CREAT | O_EXCL => ENOENT on attempt to link those guys

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd6615f0fd13..ab11c44b0697 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1744,6 +1744,7 @@ struct super_operations {
 #define I_REFERENCED		(1 << 8)
 #define __I_DIO_WAKEUP		9
 #define I_DIO_WAKEUP		(1 << I_DIO_WAKEUP)
+#define I_LINKABLE		(1 << 10)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 
-- 
cgit v1.2.3


From 0b3fca1fd1499f0f5a7486d494f96538f2b7e5b9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Jun 2013 11:37:47 +0400
Subject: kill find_inode_number()

the only remaining caller (in ncpfs) is guaranteed to return 0 -
we only hit it if we'd just checked that there's no dentry with
such name.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ab11c44b0697..1db01c13ddce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2311,7 +2311,6 @@ extern struct file * open_exec(const char *);
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
 extern int path_is_under(struct path *, struct path *);
-extern ino_t find_inode_number(struct dentry *, struct qstr *);
 
 #include <linux/err.h>
 
-- 
cgit v1.2.3


From 1bf9d14dff4a2c4de6152c6f751bdaf6896b68bb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 16 Jun 2013 20:27:42 +0400
Subject: new helper: fixed_size_llseek()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1db01c13ddce..803b7fa2520a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2428,6 +2428,8 @@ extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
 		int whence, loff_t maxsize, loff_t eof);
+extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
+		int whence, loff_t size);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
-- 
cgit v1.2.3


From 68d70d03f8f5bd10a0e7337210b13f536fd4aeb9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 19 Jun 2013 15:26:04 +0400
Subject: constify rw_verify_area()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803b7fa2520a..68f10204ab29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1898,7 +1898,6 @@ extern int current_umask(void);
 extern struct kobject *fs_kobj;
 
 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-extern int rw_verify_area(int, struct file *, loff_t *, size_t);
 
 #define FLOCK_VERIFY_READ  1
 #define FLOCK_VERIFY_WRITE 2
-- 
cgit v1.2.3


From f891a29f46553a384edbaa0f6ac446b1d03bccac Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:09 -0400
Subject: locks: drop the unused filp argument to posix_unblock_lock

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68f10204ab29..172303655702 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -994,7 +994,7 @@ extern void locks_release_private(struct file_lock *);
 extern void posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern int posix_unblock_lock(struct file *, struct file_lock *);
+extern int posix_unblock_lock(struct file_lock *);
 extern int vfs_test_lock(struct file *, struct file_lock *);
 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
@@ -1084,8 +1084,7 @@ static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	return -ENOLCK;
 }
 
-static inline int posix_unblock_lock(struct file *filp,
-				     struct file_lock *waiter)
+static inline int posix_unblock_lock(struct file_lock *waiter)
 {
 	return -ENOENT;
 }
-- 
cgit v1.2.3


From 1a9e64a7118c5ad13dd5119da18375a5bd45b330 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:10 -0400
Subject: cifs: use posix_unblock_lock instead of locks_delete_block

commit 66189be74 (CIFS: Fix VFS lock usage for oplocked files) exported
the locks_delete_block symbol. There's already an exported helper
function that provides this capability however, so make cifs use that
instead and turn locks_delete_block back into a static function.

Note that if fl->fl_next == NULL then this lock has already been through
locks_delete_block(), so we should be OK to ignore an ENOENT error here
and simply not retry the lock.

Cc: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 172303655702..6cfc9a29a783 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1006,7 +1006,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
-extern void locks_delete_block(struct file_lock *waiter);
 extern void lock_flocks(void);
 extern void unlock_flocks(void);
 #else /* !CONFIG_FILE_LOCKING */
@@ -1150,10 +1149,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 	return 1;
 }
 
-static inline void locks_delete_block(struct file_lock *waiter)
-{
-}
-
 static inline void lock_flocks(void)
 {
 }
-- 
cgit v1.2.3


From 1cb360125966cb6cb594e414ea80a0154617b846 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:12 -0400
Subject: locks: comment cleanups and clarifications

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6cfc9a29a783..ed9fdaaf3223 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -926,6 +926,24 @@ int locks_in_grace(struct net *);
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
 
+/*
+ * struct file_lock represents a generic "file lock". It's used to represent
+ * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
+ * note that the same struct is used to represent both a request for a lock and
+ * the lock itself, but the same object is never used for both.
+ *
+ * FIXME: should we create a separate "struct lock_request" to help distinguish
+ * these two uses?
+ *
+ * The i_flock list is ordered by:
+ *
+ * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX
+ * 2) lock owner
+ * 3) lock range start
+ * 4) lock range end
+ *
+ * Obviously, the last two criteria only matter for POSIX locks.
+ */
 struct file_lock {
 	struct file_lock *fl_next;	/* singly linked list for this inode  */
 	struct list_head fl_link;	/* doubly linked list of all locks */
-- 
cgit v1.2.3


From 1c8c601a8c0dc59fe64907dcd9d512a3d181ddc7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:15 -0400
Subject: locks: protect most of the file_lock handling with i_lock

Having a global lock that protects all of this code is a clear
scalability problem. Instead of doing that, move most of the code to be
protected by the i_lock instead. The exceptions are the global lists
that the ->fl_link sits on, and the ->fl_block list.

->fl_link is what connects these structures to the
global lists, so we must ensure that we hold those locks when iterating
over or updating these lists.

Furthermore, sound deadlock detection requires that we hold the
blocked_list state steady while checking for loops. We also must ensure
that the search and update to the list are atomic.

For the checking and insertion side of the blocked_list, push the
acquisition of the global lock into __posix_lock_file and ensure that
checking and update of the  blocked_list is done without dropping the
lock in between.

On the removal side, when waking up blocked lock waiters, take the
global lock before walking the blocked list and dequeue the waiters from
the global list prior to removal from the fl_block list.

With this, deadlock detection should be race free while we minimize
excessive file_lock_lock thrashing.

Finally, in order to avoid a lock inversion problem when handling
/proc/locks output we must ensure that manipulations of the fl_block
list are also protected by the file_lock_lock.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed9fdaaf3223..24fe998795e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
-extern void lock_flocks(void);
-extern void unlock_flocks(void);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, struct flock __user *user)
 {
@@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 {
 	return 1;
 }
-
-static inline void lock_flocks(void)
-{
-}
-
-static inline void unlock_flocks(void)
-{
-}
-
 #endif /* !CONFIG_FILE_LOCKING */
 
 
-- 
cgit v1.2.3


From 139ca04ee572fea6c0c105e88aba3a534efcd7c4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:17 -0400
Subject: locks: convert fl_link to a hlist_node

Testing has shown that iterating over the blocked_list for deadlock
detection turns out to be a bottleneck. In order to alleviate that,
begin the process of turning it into a hashtable. We start by turning
the fl_link into a hlist_node and the global lists into hlists. A later
patch will do the conversion of the blocked_list to a hashtable.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 24fe998795e1..fab064a3b65f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -946,7 +946,7 @@ int locks_in_grace(struct net *);
  */
 struct file_lock {
 	struct file_lock *fl_next;	/* singly linked list for this inode  */
-	struct list_head fl_link;	/* doubly linked list of all locks */
+	struct hlist_node fl_link;	/* node in global lists */
 	struct list_head fl_block;	/* circular list of blocked processes */
 	fl_owner_t fl_owner;
 	unsigned int fl_flags;
-- 
cgit v1.2.3


From 3999e49364193f7dbbba66e2be655fe91ba1fced Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:19 -0400
Subject: locks: add a new "lm_owner_key" lock operation

Currently, the hashing that the locking code uses to add these values
to the blocked_hash is simply calculated using fl_owner field. That's
valid in most cases except for server-side lockd, which validates the
owner of a lock based on fl_owner and fl_pid.

In the case where you have a small number of NFS clients doing a lot
of locking between different processes, you could end up with all
the blocked requests sitting in a very small number of hash buckets.

Add a new lm_owner_key operation to the lock_manager_operations that
will generate an unsigned long to use as the key in the hashtable.
That function is only implemented for server-side lockd, and simply
XORs the fl_owner and fl_pid.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fab064a3b65f..a137a73fc1fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -908,6 +908,7 @@ struct file_lock_operations {
 
 struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
+	unsigned long (*lm_owner_key)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
-- 
cgit v1.2.3


From 46a1c2c7ae53de2a5676754b54a73c591a3951d2 Mon Sep 17 00:00:00 2001
From: Jie Liu <jeff.liu@oracle.com>
Date: Tue, 25 Jun 2013 12:02:13 +0800
Subject: vfs: export lseek_execute() to modules

For those file systems(btrfs/ext4/ocfs2/tmpfs) that support
SEEK_DATA/SEEK_HOLE functions, we end up handling the similar
matter in lseek_execute() to update the current file offset
to the desired offset if it is valid, ceph also does the
simliar things at ceph_llseek().

To reduce the duplications, this patch make lseek_execute()
public accessible so that we can call it directly from the
underlying file systems.

Thanks Dave Chinner for this suggestion.

[AV: call it vfs_setpos(), don't bring the removed 'inode' argument back]

v2->v1:
- Add kernel-doc comments for lseek_execute()
- Call lseek_execute() in ceph->llseek()

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chris Mason <chris.mason@fusionio.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Cc: Ben Myers <bpm@sgi.com>
Cc: Ted Tso <tytso@mit.edu>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Sage Weil <sage@inktank.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a137a73fc1fe..bccb1924ec93 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2426,6 +2426,7 @@ extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
 		int whence, loff_t maxsize, loff_t eof);
-- 
cgit v1.2.3


From b45972265f823ed01eae0867a176320071665787 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 3 Jul 2013 15:02:05 -0700
Subject: mm: vmscan: take page buffers dirty and locked state into account

Page reclaim keeps track of dirty and under writeback pages and uses it
to determine if wait_iff_congested() should stall or if kswapd should
begin writing back pages.  This fails to account for buffer pages that
can be under writeback but not PageWriteback which is the case for
filesystems like ext3 ordered mode.  Furthermore, PageDirty buffer pages
can have all the buffers clean and writepage does no IO so it should not
be accounted as congested.

This patch adds an address_space operation that filesystems may
optionally use to check if a page is really dirty or really under
writeback.  An implementation is provided for for buffer_heads is added
and used for block operations and ext3 in ordered mode.  By default the
page flags are obeyed.

Credit goes to Jan Kara for identifying that the page flags alone are
not sufficient for ext3 and sanity checking a number of ideas on how the
problem could be addressed.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Zlatko Calusic <zcalusic@bitsync.net>
Cc: dormando <dormando@rydia.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b82c8041490..99be011e00de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -380,6 +380,7 @@ struct address_space_operations {
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
+	void (*is_dirty_writeback) (struct page *, bool *, bool *);
 	int (*error_remove_page)(struct address_space *, struct page *);
 
 	/* swapfile support */
-- 
cgit v1.2.3


From 7012b02a2b2c42bb1e1d95040a6e3bb59c7284f7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 21 Jun 2013 08:58:22 -0400
Subject: locks: move file_lock_list to a set of percpu hlist_heads and convert
 file_lock_lock to an lglock

The file_lock_list is only used for /proc/locks. The vastly common case
is for locks to be put onto the list and come off again, without ever
being traversed.

Help optimize for this use-case by moving to percpu hlist_head-s. At the
same time, we can make the locking less contentious by moving to an
lglock. When iterating over the lists for /proc/locks, we must take the
global lock and then iterate over each CPU's list in turn.

This change necessitates a new fl_link_cpu field to keep track of which
CPU the entry is on. On x86_64 at least, this field is placed within an
existing hole in the struct to avoid growing the size.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 99be011e00de..834c9e5113d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -954,6 +954,7 @@ struct file_lock {
 	unsigned int fl_flags;
 	unsigned char fl_type;
 	unsigned int fl_pid;
+	int fl_link_cpu;		/* what cpu's list is this on? */
 	struct pid *fl_nspid;
 	wait_queue_head_t fl_wait;
 	struct file *fl_file;
-- 
cgit v1.2.3


From ef277c73ca3b1aade278036ae11640090681d558 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Mon, 8 Jul 2013 16:00:21 -0700
Subject: page migration: fix wrong comment in
 address_space_operations.migratepage()

There is no parameter "sync" in address_space_operations->migratepage().
It should be migrate_mode.  And the comment is for MIGRATE_ASYNC.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 99be011e00de..cb771ecc2362 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -372,8 +372,8 @@ struct address_space_operations {
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
 	/*
-	 * migrate the contents of a page to the specified target. If sync
-	 * is false, it must not block.
+	 * migrate the contents of a page to the specified target. If
+	 * migrate_mode is MIGRATE_ASYNC, it must not block.
 	 */
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
-- 
cgit v1.2.3


From 4f5e65a1cc90bbb15b9f6cdc362922af1bcc155a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 8 Jul 2013 14:24:16 -0700
Subject: fput: turn "list_head delayed_fput_list" into llist_head

fput() and delayed_fput() can use llist and avoid the locking.

This is unlikely path, it is not that this change can improve
the performance, but this way the code looks simpler.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 834c9e5113d9..d40e8e78bbd1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/radix-tree.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
@@ -768,6 +769,7 @@ struct file {
 	 */
 	union {
 		struct list_head	fu_list;
+		struct llist_node	fu_llist;
 		struct rcu_head 	fu_rcuhead;
 	} f_u;
 	struct path		f_path;
-- 
cgit v1.2.3