From 5017e97d52628fb8ae56e434e86ac2e72ddaac2b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:40 +0000 Subject: xfs: rename xfs_get_perag xfs_get_perag is really getting the perag that an inode belongs to based on it's inode number. Convert the use of this function to just get the perag from a provided ag number. Use this new function to obtain the per-ag structure when traversing the per AG inode trees for sync and reclaim. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ef77fd88c8e3..bd3d81636d51 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1946,8 +1946,9 @@ xfs_ifree_cluster( xfs_inode_t *ip, **ip_found; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; - xfs_perag_t *pag = xfs_get_perag(mp, inum); + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; ninodes = mp->m_sb.sb_inopblock; @@ -2088,7 +2089,7 @@ xfs_ifree_cluster( } kmem_free(ip_found); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } /* @@ -2675,7 +2676,7 @@ xfs_iflush_cluster( xfs_buf_t *bp) { xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_perag *pag; unsigned long first_index, mask; unsigned long inodes_per_cluster; int ilist_size; @@ -2686,6 +2687,7 @@ xfs_iflush_cluster( int bufwasdelwri; int i; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); -- cgit v1.2.3 From 44b56e0a1aed522a10051645e85d300e10926fd3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:43 +0000 Subject: xfs: convert remaining direct references to m_perag Convert the remaining direct lookups of the per ag structures to use get/put accesses. Ensure that the loops across AGs and prior users of the interface balance gets and puts correctly. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bd3d81636d51..0317b000ab44 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2695,7 +2695,7 @@ xfs_iflush_cluster( ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) - return 0; + goto out_put; mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; @@ -2764,6 +2764,8 @@ xfs_iflush_cluster( out_free: read_unlock(&pag->pag_ici_lock); kmem_free(ilist); +out_put: + xfs_perag_put(pag); return 0; @@ -2807,6 +2809,7 @@ cluster_corrupt_out: */ xfs_iflush_abort(iq); kmem_free(ilist); + xfs_perag_put(pag); return XFS_ERROR(EFSCORRUPTED); } -- cgit v1.2.3 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_inode.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0317b000ab44..bbb3bee8e936 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -151,7 +151,7 @@ xfs_imap_to_bp( "an error %d on %s. Returning error.", error, mp->m_fsname); } else { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); } return error; } @@ -239,7 +239,7 @@ xfs_inotobp( if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); if (error) return error; @@ -285,7 +285,7 @@ xfs_itobp( return error; if (!bp) { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); ASSERT(tp == NULL); *bpp = NULL; return EAGAIN; @@ -807,7 +807,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, iget_flags); + XBF_LOCK, iget_flags); if (error) return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -1751,7 +1751,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -1833,7 +1833,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -1895,7 +1895,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2040,7 +2040,7 @@ xfs_ifree_cluster( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); pre_flushed = 0; lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); @@ -2151,7 +2151,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -2952,7 +2952,7 @@ xfs_iflush( * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); + noblock ? XBF_TRYLOCK : XBF_LOCK); if (error || !bp) { xfs_ifunlock(ip); return error; -- cgit v1.2.3 From a14a348bff2f99471a28e5928eb6801224c053d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:46 +0000 Subject: xfs: cleanup up xfs_log_force calling conventions Remove the XFS_LOG_FORCE argument which was always set, and the XFS_LOG_URGE define, which was never used. Split xfs_log_force into a two helpers - xfs_log_force which forces the whole log, and xfs_log_force_lsn which forces up to the specified LSN. The underlying implementations already were entirely separate, as were the users. Also re-indent the new _xfs_log_force/_xfs_log_force which previously had a weird coding style. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bbb3bee8e936..d0d1b5a05183 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2484,8 +2484,11 @@ __xfs_iunpin_wait( return; /* Give the log a push to start the unpinning I/O */ - xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? - iip->ili_last_lsn : 0, XFS_LOG_FORCE); + if (iip && iip->ili_last_lsn) + xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0); + else + xfs_log_force(ip->i_mount, 0); + if (wait) wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } @@ -2970,7 +2973,7 @@ xfs_iflush( * get stuck waiting in the write for too long. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); /* * inode clustering: -- cgit v1.2.3 From 777df5afdb26c71634edd60582be620ff94e87a0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:37:26 +1100 Subject: xfs: Make inode reclaim states explicit A.K.A.: don't rely on xfs_iflush() return value in reclaim We have gradually been moving checks out of the reclaim code because they are duplicated in xfs_iflush(). We've had a history of problems in this area, and many of them stem from the overloading of the return values from xfs_iflush() and interaction with inode flush locking to determine if the inode is safe to reclaim. With the desire to move to delayed write flushing of inodes and non-blocking inode tree reclaim walks, the overloading of the return value of xfs_iflush makes it very difficult to determine the correct thing to do next. This patch explicitly re-adds the checks to the inode reclaim code, removing the reliance on the return value of xfs_iflush() to determine what to do next. It also means that we can clearly document all the inode states that reclaim must handle and hence we can easily see that we handled all the necessary cases. This also removes the need for the xfs_inode_clean() check in xfs_iflush() as all callers now check this first (safely). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d0d1b5a05183..8d0666dd170a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2493,7 +2493,7 @@ __xfs_iunpin_wait( wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } -static inline void +void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -2848,15 +2848,6 @@ xfs_iflush( iip = ip->i_itemp; mp = ip->i_mount; - /* - * If the inode isn't dirty, then just release the inode flush lock and - * do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - /* * We can't flush the inode until it is unpinned, so wait for it if we * are allowed to block. We know noone new can pin it, because we are -- cgit v1.2.3 From c854363e80b49dd04a4de18ebc379eb8c8806674 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:39:36 +1100 Subject: xfs: Use delayed write for inodes rather than async V2 We currently do background inode flush asynchronously, resulting in inodes being written in whatever order the background writeback issues them. Not only that, there are also blocking and non-blocking asynchronous inode flushes, depending on where the flush comes from. This patch completely removes asynchronous inode writeback. It removes all the strange writeback modes and replaces them with either a synchronous flush or a non-blocking delayed write flush. That is, inode flushes will only issue IO directly if they are synchronous, and background flushing may do nothing if the operation would block (e.g. on a pinned inode or buffer lock). Delayed write flushes will now result in the inode buffer sitting in the delwri queue of the buffer cache to be flushed by either an AIL push or by the xfsbufd timing out the buffer. This will allow accumulation of dirty inode buffers in memory and allow optimisation of inode cluster writeback at the xfsbufd level where we have much greater queue depths than the block layer elevators. We will also get adjacent inode cluster buffer IO merging for free when a later patch in the series allows sorting of the delayed write buffers before dispatch. This effectively means that any inode that is written back by background writeback will be seen as flush locked during AIL pushing, and will result in the buffers being pushed from there. This writeback path is currently non-optimal, but the next patch in the series will fix that problem. A side effect of this delayed write mechanism is that background inode reclaim will no longer directly flush inodes, nor can it wait on the flush lock. The result is that inode reclaim must leave the inode in the reclaimable state until it is clean. Hence attempts to reclaim a dirty inode in the background will simply skip the inode until it is clean and this allows other mechanisms (i.e. xfsbufd) to do more optimal writeback of the dirty buffers. As a result, the inode reclaim code has been rewritten so that it no longer relies on the ambiguous return values of xfs_iflush() to determine whether it is safe to reclaim an inode. Portions of this patch are derived from patches by Christoph Hellwig. Version 2: - cleanup reclaim code as suggested by Christoph - log background reclaim inode flush errors - just pass sync flags to xfs_iflush Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 75 ++++-------------------------------------------------- 1 file changed, 5 insertions(+), 70 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8d0666dd170a..fa31360046d4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2835,8 +2835,6 @@ xfs_iflush( xfs_dinode_t *dip; xfs_mount_t *mp; int error; - int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); - enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; XFS_STATS_INC(xs_iflush_count); @@ -2859,7 +2857,7 @@ xfs_iflush( * in the same cluster are dirty, they will probably write the inode * out for us if they occur after the log force completes. */ - if (noblock && xfs_ipincount(ip)) { + if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { xfs_iunpin_nowait(ip); xfs_ifunlock(ip); return EAGAIN; @@ -2892,61 +2890,11 @@ xfs_iflush( return XFS_ERROR(EIO); } - /* - * Decide how buffer will be flushed out. This is done before - * the call to xfs_iflush_int because this field is zeroed by it. - */ - if (iip != NULL && iip->ili_format.ilf_fields != 0) { - /* - * Flush out the inode buffer according to the directions - * of the caller. In the cases where the caller has given - * us a choice choose the non-delwri case. This is because - * the inode is in the AIL and we need to get it out soon. - */ - switch (flags) { - case XFS_IFLUSH_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - flags = 0; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } else { - switch (flags) { - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_SYNC: - flags = 0; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } - /* * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XBF_TRYLOCK : XBF_LOCK); + (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); if (error || !bp) { xfs_ifunlock(ip); return error; @@ -2974,13 +2922,10 @@ xfs_iflush( if (error) goto cluster_corrupt_out; - if (flags & INT_DELWRI) { - xfs_bdwrite(mp, bp); - } else if (flags & INT_ASYNC) { - error = xfs_bawrite(mp, bp); - } else { + if (flags & SYNC_WAIT) error = xfs_bwrite(mp, bp); - } + else + xfs_bdwrite(mp, bp); return error; corrupt_out: @@ -3015,16 +2960,6 @@ xfs_iflush_int( iip = ip->i_itemp; mp = ip->i_mount; - - /* - * If the inode isn't dirty, then just release the inode - * flush lock and do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); -- cgit v1.2.3