From 1e8968c5b0582392d5f132422f581e3ebc24e627 Mon Sep 17 00:00:00 2001
From: Niels de Vos <ndevos@redhat.com>
Date: Tue, 17 Dec 2013 18:20:16 +0100
Subject: NFS: dprintk() should not print negative fileids and inode numbers

A fileid in NFS is a uint64. There are some occurrences where dprintk()
outputs a signed fileid. This leads to confusion and more difficult to
read debugging (negative fileids matching positive inode numbers).

Signed-off-by: Niels de Vos <ndevos@redhat.com>
CC: Santosh Pradhan <spradhan@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/nfs/write.c')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c1d548211c31..77a00c66c7d9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1013,10 +1013,10 @@ int nfs_initiate_write(struct rpc_clnt *clnt,
 	NFS_PROTO(inode)->write_setup(data, &msg);
 
 	dprintk("NFS: %5u initiated write call "
-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+		"(req %s/%llu, %u bytes @ offset %llu)\n",
 		data->task.tk_pid,
 		inode->i_sb->s_id,
-		(long long)NFS_FILEID(inode),
+		(unsigned long long)NFS_FILEID(inode),
 		data->args.count,
 		(unsigned long long)data->args.offset);
 
@@ -1606,9 +1606,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		nfs_list_remove_request(req);
 		nfs_clear_page_commit(req->wb_page);
 
-		dprintk("NFS:       commit (%s/%lld %d@%lld)",
+		dprintk("NFS:       commit (%s/%llu %d@%lld)",
 			req->wb_context->dentry->d_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (status < 0) {
-- 
cgit v1.2.3


From 263b4509ec4d47e0da3e753f85a39ea12d1eff24 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Fri, 17 Jan 2014 15:12:05 -0500
Subject: nfs: always make sure page is up-to-date before extending a write to
 cover the entire page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We should always make sure the cached page is up-to-date when we're
determining whether we can extend a write to cover the full page -- even
if we've received a write delegation from the server.

Commit c7559663 added logic to skip this check if we have a write
delegation, which can lead to data corruption such as the following
scenario if client B receives a write delegation from the NFS server:

Client A:
    # echo 123456789 > /mnt/file

Client B:
    # echo abcdefghi >> /mnt/file
    # cat /mnt/file
    0�D0�abcdefghi

Just because we hold a write delegation doesn't mean that we've read in
the entire page contents.

Cc: <stable@vger.kernel.org> # v3.11+
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs/nfs/write.c')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 77a00c66c7d9..a44a87268a6e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -922,19 +922,20 @@ out:
  * extend the write to cover the entire page in order to avoid fragmentation
  * inefficiencies.
  *
- * If the file is opened for synchronous writes or if we have a write delegation
- * from the server then we can just skip the rest of the checks.
+ * If the file is opened for synchronous writes then we can just skip the rest
+ * of the checks.
  */
 static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
 {
 	if (file->f_flags & O_DSYNC)
 		return 0;
+	if (!nfs_write_pageuptodate(page, inode))
+		return 0;
 	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
 		return 1;
-	if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL ||
-			(inode->i_flock->fl_start == 0 &&
+	if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
 			inode->i_flock->fl_end == OFFSET_MAX &&
-			inode->i_flock->fl_type != F_RDLCK)))
+			inode->i_flock->fl_type != F_RDLCK))
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From d529ef83c355f97027ff85298a9709fe06216a66 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 27 Jan 2014 13:46:15 -0500
Subject: NFS: fix the handling of NFS_INO_INVALID_DATA flag in
 nfs_revalidate_mapping

There is a possible race in how the nfs_invalidate_mapping function is
handled.  Currently, we go and invalidate the pages in the file and then
clear NFS_INO_INVALID_DATA.

The problem is that it's possible for a stale page to creep into the
mapping after the page was invalidated (i.e., via readahead). If another
writer comes along and sets the flag after that happens but before
invalidate_inode_pages2 returns then we could clear the flag
without the cache having been properly invalidated.

So, we must clear the flag first and then invalidate the pages. Doing
this however, opens another race:

It's possible to have two concurrent read() calls that end up in
nfs_revalidate_mapping at the same time. The first one clears the
NFS_INO_INVALID_DATA flag and then goes to call nfs_invalidate_mapping.

Just before calling that though, the other task races in, checks the
flag and finds it cleared. At that point, it trusts that the mapping is
good and gets the lock on the page, allowing the read() to be satisfied
from the cache even though the data is no longer valid.

These effects are easily manifested by running diotest3 from the LTP
test suite on NFS. That program does a series of DIO writes and buffered
reads. The operations are serialized and page-aligned but the existing
code fails the test since it occasionally allows a read to come out of
the cache incorrectly. While mixing direct and buffered I/O isn't
recommended, I believe it's possible to hit this in other ways that just
use buffered I/O, though that situation is much harder to reproduce.

The problem is that the checking/clearing of that flag and the
invalidation of the mapping really need to be atomic. Fix this by
serializing concurrent invalidations with a bitlock.

At the same time, we also need to allow other places that check
NFS_INO_INVALID_DATA to check whether we might be in the middle of
invalidating the file, so fix up a couple of places that do that
to look for the new NFS_INO_INVALIDATING flag.

Doing this requires us to be careful not to set the bitlock
unnecessarily, so this code only does that if it believes it will
be doing an invalidation.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs/nfs/write.c')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a44a87268a6e..5511a4247190 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -909,9 +909,13 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
  */
 static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
 {
+	struct nfs_inode *nfsi = NFS_I(inode);
+
 	if (nfs_have_delegated_attributes(inode))
 		goto out;
-	if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+	if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+		return false;
+	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
 		return false;
 out:
 	return PageUptodate(page) != 0;
-- 
cgit v1.2.3


From 4db72b40fdbc706f8957e9773ae73b1574b8c694 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 28 Jan 2014 13:47:46 -0500
Subject: nfs: add memory barriers around NFS_INO_INVALID_DATA and
 NFS_INO_INVALIDATING

If the setting of NFS_INO_INVALIDATING gets reordered to before the
clearing of NFS_INO_INVALID_DATA, then another task may hit a race
window where both appear to be clear, even though the inode's pages are
still in need of invalidation. Fix this by adding the appropriate memory
barriers.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/nfs/write.c')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5511a4247190..9a3b6a4cd6b9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -915,6 +915,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
 		goto out;
 	if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
 		return false;
+	smp_rmb();
 	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
 		return false;
 out:
-- 
cgit v1.2.3