From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Thu, 10 Mar 2011 08:52:07 +0100
Subject: block: remove per-queue plugging

Code has been converted over to the new explicit on-stack plugging,
and delay users have been converted to use the new API for that.
So lets kill off the old plugging along with aops->sync_page().

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/exofs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/exofs/inode.c')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41a..82b94c8f5d22 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -795,7 +795,6 @@ const struct address_space_operations exofs_aops = {
 	.direct_IO	= NULL, /* TODO: Should be trivial to do */
 
 	/* With these NULL has special meaning or default is not exported */
-	.sync_page	= NULL,
 	.get_xip_mem	= NULL,
 	.migratepage	= NULL,
 	.launder_page	= NULL,
-- 
cgit v1.2.3


From a8f1418f9e9bd4c487a7b703ff26c5dd5ceb2bf3 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Mon, 22 Nov 2010 18:02:45 +0200
Subject: exofs: Optimize read_4_write

Don't attempt a read passed i_size, just zero the page and be
done with it.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/inode.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'fs/exofs/inode.c')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41a..c8f58a96e597 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -350,8 +350,10 @@ static int readpage_strip(void *data, struct page *page)
 
 		if (!pcol->read_4_write)
 			unlock_page(page);
-		EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
-			     " splitting\n", inode->i_ino, page->index);
+		EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
+			     "read_4_write=%d index=0x%lx end_index=0x%lx "
+			     "splitting\n", inode->i_ino, len,
+			     pcol->read_4_write, page->index, end_index);
 
 		return read_exec(pcol);
 	}
@@ -722,11 +724,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 
 	 /* read modify write */
 	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+		loff_t i_size = i_size_read(mapping->host);
+		pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+		size_t rlen;
+
+		if (page->index < end_index)
+			rlen = PAGE_CACHE_SIZE;
+		else if (page->index == end_index)
+			rlen = i_size & ~PAGE_CACHE_MASK;
+		else
+			rlen = 0;
+
+		if (!rlen) {
+			clear_highpage(page);
+			SetPageUptodate(page);
+			goto out;
+		}
+
 		ret = _readpage(page, true);
 		if (ret) {
 			/*SetPageError was done by _readpage. Is it ok?*/
 			unlock_page(page);
-			EXOFS_DBGMSG("__readpage_filler failed\n");
+			EXOFS_DBGMSG("__readpage failed\n");
 		}
 	}
 out:
-- 
cgit v1.2.3


From 97178b7b6c84bd14660b89474d27931a1ea65c66 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Thu, 25 Nov 2010 12:47:15 +0200
Subject: exofs: simple fsync race fix

It is incorrect to test inode dirty bits without participating in the inode
writeback protocol. Inode writeback sets I_SYNC and clears I_DIRTY_?, then
writes out the particular bits, then clears I_SYNC when it is done. BTW. it
may not completely write all pages out, so I_DIRTY_PAGES would get set
again.

This is a standard pattern used throughout the kernel's writeback caches
(I_SYNC ~= I_WRITEBACK, if that makes it clearer).

And so it is not possible to determine an inode's dirty status just by
checking I_DIRTY bits. Especially not for the purpose of data integrity
syncs.

Missing the check for these bits means that fsync can complete while
writeback to the inode is underway. Inode writeback functions get this
right, so call into them rather than try to shortcut things by testing
dirty state improperly.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/exofs/inode.c')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index c8f58a96e597..fb9d38056103 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1290,7 +1290,8 @@ out:
 
 int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+	/* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
+	return exofs_update_inode(inode, 1);
 }
 
 /*
-- 
cgit v1.2.3


From 66cd6cad4919f980dd21307d0150ff251762a264 Mon Sep 17 00:00:00 2001
From: "bharrosh@panasas.com" <bharrosh@panasas.com>
Date: Thu, 7 Oct 2010 14:28:18 -0400
Subject: exofs: Override read-ahead to align on stripe_size

* Set all inode->i_mapping->backing_dev_info to point to
  the per super-block sb->s_bdi.

* Calculating a read_ahead that is:
  - preferable 2 stripes long
    (Future patch will add a mount option to override this)
  - Minimum 128K aligned up to stripe-size
  - Caped to maximum-IO-sizes round down to stripe_size.
    (Max sizes are governed by max bio-size that fits in a page
     times number-of-devices)

CC: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/inode.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs/exofs/inode.c')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index fb9d38056103..681b3cb9b4d8 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
 		PAGE_SIZE / sizeof(struct page *),
 };
 
+unsigned exofs_max_io_pages(struct exofs_layout *layout,
+			    unsigned expected_pages)
+{
+	unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
+
+	/* TODO: easily support bio chaining */
+	pages =  min_t(unsigned, pages,
+		       layout->group_width * BIO_MAX_PAGES_KMALLOC);
+	return pages;
+}
+
 struct page_collect {
 	struct exofs_sb_info *sbi;
 	struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
 
 static int pcol_try_alloc(struct page_collect *pcol)
 {
-	unsigned pages = min_t(unsigned, pcol->expected_pages,
-			  MAX_PAGES_KMALLOC);
+	unsigned pages;
 
 	if (!pcol->ios) { /* First time allocate io_state */
 		int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
 	}
 
 	/* TODO: easily support bio chaining */
-	pages =  min_t(unsigned, pages,
-		       pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
+	pages =  exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
 
 	for (; pages; pages >>= 1) {
 		pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -1049,6 +1058,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
 		memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
 	}
 
+	inode->i_mapping->backing_dev_info = sb->s_bdi;
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &exofs_file_inode_operations;
 		inode->i_fop = &exofs_file_operations;
@@ -1149,6 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
 
 	sbi = sb->s_fs_info;
 
+	inode->i_mapping->backing_dev_info = sb->s_bdi;
 	sb->s_dirt = 1;
 	inode_init_owner(inode, dir, mode);
 	inode->i_ino = sbi->s_nextid++;
-- 
cgit v1.2.3


From 1cea312ad49d9cb964179a784fedb1fcfe396283 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Feb 2011 17:53:25 +0200
Subject: exofs: Write sbi->s_nextid as part of the Create command

Before when creating a new inode, we'd set the sb->s_dirt flag,
and sometime later the system would write out s_nextid as part
of the sb_info. Also on inode sync we would force the sb sync
as well.

Define the s_nextid as a new partition attribute and set it
every time we create a new object.
At mount we read it from it's new place.

We now never set sb->s_dirt anywhere in exofs. write_super
is actually never called. The call to exofs_write_super from
exofs_put_super is also removed because the VFS always calls
->sync_fs before calling ->put_super twice.

To stay backward-and-forward compatible we also write the old
s_nextid in the super_block object at unmount, and support zero
length attribute on mount.

This also fixes a BUG where in layouts when group_width was not
a divisor of EXOFS_SUPER_ID (0x10000) the s_nextid was not read
from the device it was written to. Because of the sliding window
layout trick, and because the read was always done from the 0
device but the write was done via the raid engine that might slide
the device view. Now we read and write through the raid engine.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs/exofs/inode.c')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 681b3cb9b4d8..0c713cfbebf0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1102,6 +1102,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
 	}
 	return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
 }
+
 /*
  * Callback function from exofs_new_inode().  The important thing is that we
  * set the obj_created flag so that other methods know that the object exists on
@@ -1160,7 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
 	sbi = sb->s_fs_info;
 
 	inode->i_mapping->backing_dev_info = sb->s_bdi;
-	sb->s_dirt = 1;
 	inode_init_owner(inode, dir, mode);
 	inode->i_ino = sbi->s_nextid++;
 	inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1171,6 +1171,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
 	spin_unlock(&sbi->s_next_gen_lock);
 	insert_inode_hash(inode);
 
+	exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
+
 	mark_inode_dirty(inode);
 
 	ret = exofs_get_io_state(&sbi->layout, &ios);
-- 
cgit v1.2.3