From 4eea22f01bb4fdba1aab4430c33adbe88d9d4985 Mon Sep 17 00:00:00 2001 From: Mandy Kirkconnell Date: Tue, 14 Mar 2006 13:29:52 +1100 Subject: [XFS] 929045 567344 This mod re-organizes some of the in-core file extent code to prepare for an upcoming mod which will introduce multi-level in-core extent allocations. Although the in-core extent management is using a new code path in this mod, the functionality remains the same. Major changes include: - Introduce 10 new subroutines which re-orgainze the existing code but do NOT change functionality: xfs_iext_get_ext() xfs_iext_insert() xfs_iext_add() xfs_iext_remove() xfs_iext_remove_inline() xfs_iext_remove_direct() xfs_iext_realloc_direct() xfs_iext_direct_to_inline() xfs_iext_inline_to_direct() xfs_iext_destroy() - Remove 2 subroutines (functionality moved to new subroutines above): xfs_iext_realloc() -replaced by xfs_iext_add() and xfs_iext_remove() xfs_bmap_insert_exlist() - replaced by xfs_iext_insert() xfs_bmap_delete_exlist() - replaced by xfs_iext_remove() - Replace all hard-coded (indexed) extent assignments with a call to xfs_iext_get_ext() - Replace all extent record pointer arithmetic (ep++, ep--, base + lastx,..) with calls to xfs_iext_get_ext() - Update comments to remove the idea of a single "extent list" and introduce "extent record" terminology instead SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207390a Signed-off-by: Mandy Kirkconnell Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 470 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 351 insertions(+), 119 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d7f5a7e063e..6459395a0e40 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -76,16 +76,18 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); */ STATIC void xfs_validate_extents( - xfs_bmbt_rec_t *ep, + xfs_ifork_t *ifp, int nrecs, int disk, xfs_exntfmt_t fmt) { + xfs_bmbt_rec_t *ep; xfs_bmbt_irec_t irec; xfs_bmbt_rec_t rec; int i; for (i = 0; i < nrecs; i++) { + ep = xfs_iext_get_ext(ifp, i); rec.l0 = get_unaligned((__uint64_t*)&ep->l0); rec.l1 = get_unaligned((__uint64_t*)&ep->l1); if (disk) @@ -94,11 +96,10 @@ xfs_validate_extents( xfs_bmbt_get_all(&rec, &irec); if (fmt == XFS_EXTFMT_NOSTATE) ASSERT(irec.br_state == XFS_EXT_NORM); - ep++; } } #else /* DEBUG */ -#define xfs_validate_extents(ep, nrecs, disk, fmt) +#define xfs_validate_extents(ifp, nrecs, disk, fmt) #endif /* DEBUG */ /* @@ -597,7 +598,6 @@ xfs_iformat_extents( xfs_bmbt_rec_t *ep, *dp; xfs_ifork_t *ifp; int nex; - int real_size; int size; int i; @@ -619,23 +619,20 @@ xfs_iformat_extents( return XFS_ERROR(EFSCORRUPTED); } - real_size = 0; + ifp->if_real_bytes = 0; if (nex == 0) ifp->if_u1.if_extents = NULL; else if (nex <= XFS_INLINE_EXTS) ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - else { - ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); - ASSERT(ifp->if_u1.if_extents != NULL); - real_size = size; - } + else + xfs_iext_add(ifp, 0, nex); + ifp->if_bytes = size; - ifp->if_real_bytes = real_size; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); - xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip)); - ep = ifp->if_u1.if_extents; - for (i = 0; i < nex; i++, ep++, dp++) { + xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip)); + for (i = 0; i < nex; i++, dp++) { + ep = xfs_iext_get_ext(ifp, i); ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), ARCH_CONVERT); ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), @@ -646,7 +643,7 @@ xfs_iformat_extents( if (whichfork != XFS_DATA_FORK || XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) if (unlikely(xfs_check_nostate_extents( - ifp->if_u1.if_extents, nex))) { + ifp, 0, nex))) { XFS_ERROR_REPORT("xfs_iformat_extents(2)", XFS_ERRLEVEL_LOW, ip->i_mount); @@ -1015,6 +1012,7 @@ xfs_iread_extents( { int error; xfs_ifork_t *ifp; + xfs_extnum_t nextents; size_t size; if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { @@ -1022,26 +1020,24 @@ xfs_iread_extents( ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } - size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t); + nextents = XFS_IFORK_NEXTENTS(ip, whichfork); + size = nextents * sizeof(xfs_bmbt_rec_t); ifp = XFS_IFORK_PTR(ip, whichfork); + /* * We know that the size is valid (it's checked in iformat_btree) */ - ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); - ASSERT(ifp->if_u1.if_extents != NULL); ifp->if_lastex = NULLEXTNUM; - ifp->if_bytes = ifp->if_real_bytes = (int)size; + ifp->if_bytes = ifp->if_real_bytes = 0; ifp->if_flags |= XFS_IFEXTENTS; + xfs_iext_add(ifp, 0, nextents); error = xfs_bmap_read_extents(tp, ip, whichfork); if (error) { - kmem_free(ifp->if_u1.if_extents, size); - ifp->if_u1.if_extents = NULL; - ifp->if_bytes = ifp->if_real_bytes = 0; + xfs_iext_destroy(ifp); ifp->if_flags &= ~XFS_IFEXTENTS; return error; } - xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents, - XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip)); + xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip)); return 0; } @@ -2475,92 +2471,6 @@ xfs_iroot_realloc( } -/* - * This is called when the amount of space needed for if_extents - * is increased or decreased. The change in size is indicated by - * the number of extents that need to be added or deleted in the - * ext_diff parameter. - * - * If the amount of space needed has decreased below the size of the - * inline buffer, then switch to using the inline buffer. Otherwise, - * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer - * to what is needed. - * - * ip -- the inode whose if_extents area is changing - * ext_diff -- the change in the number of extents, positive or negative, - * requested for the if_extents array. - */ -void -xfs_iext_realloc( - xfs_inode_t *ip, - int ext_diff, - int whichfork) -{ - int byte_diff; - xfs_ifork_t *ifp; - int new_size; - uint rnew_size; - - if (ext_diff == 0) { - return; - } - - ifp = XFS_IFORK_PTR(ip, whichfork); - byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t); - new_size = (int)ifp->if_bytes + byte_diff; - ASSERT(new_size >= 0); - - if (new_size == 0) { - if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { - ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); - } - ifp->if_u1.if_extents = NULL; - rnew_size = 0; - } else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) { - /* - * If the valid extents can fit in if_inline_ext, - * copy them from the malloc'd vector and free it. - */ - if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { - /* - * For now, empty files are format EXTENTS, - * so the if_extents pointer is null. - */ - if (ifp->if_u1.if_extents) { - memcpy(ifp->if_u2.if_inline_ext, - ifp->if_u1.if_extents, new_size); - kmem_free(ifp->if_u1.if_extents, - ifp->if_real_bytes); - } - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - } - rnew_size = 0; - } else { - rnew_size = new_size; - if ((rnew_size & (rnew_size - 1)) != 0) - rnew_size = xfs_iroundup(rnew_size); - /* - * Stuck with malloc/realloc. - */ - if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) { - ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) - kmem_alloc(rnew_size, KM_SLEEP); - memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, - sizeof(ifp->if_u2.if_inline_ext)); - } else if (rnew_size != ifp->if_real_bytes) { - ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) - kmem_realloc(ifp->if_u1.if_extents, - rnew_size, - ifp->if_real_bytes, - KM_NOFS); - } - } - ifp->if_real_bytes = rnew_size; - ifp->if_bytes = new_size; -} - - /* * This is called when the amount of space needed for if_data * is increased or decreased. The change in size is indicated by @@ -2723,9 +2633,7 @@ xfs_idestroy_fork( (ifp->if_u1.if_extents != NULL) && (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); - ifp->if_u1.if_extents = NULL; - ifp->if_real_bytes = 0; + xfs_iext_destroy(ifp); } ASSERT(ifp->if_u1.if_extents == NULL || ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); @@ -2902,16 +2810,15 @@ xfs_iextents_copy( * the delayed ones. There must be at least one * non-delayed extent. */ - ep = ifp->if_u1.if_extents; dest_ep = buffer; copied = 0; for (i = 0; i < nrecs; i++) { + ep = xfs_iext_get_ext(ifp, i); start_block = xfs_bmbt_get_startblock(ep); if (ISNULLSTARTBLOCK(start_block)) { /* * It's a delayed allocation extent, so skip it. */ - ep++; continue; } @@ -2921,11 +2828,10 @@ xfs_iextents_copy( put_unaligned(INT_GET(ep->l1, ARCH_CONVERT), (__uint64_t*)&dest_ep->l1); dest_ep++; - ep++; copied++; } ASSERT(copied != 0); - xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip)); + xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip)); return (copied * (uint)sizeof(xfs_bmbt_rec_t)); } @@ -2995,8 +2901,10 @@ xfs_iflush_fork( case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || !(iip->ili_format.ilf_fields & extflag[whichfork])); - ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0)); - ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0)); + ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || + (ifp->if_bytes == 0)); + ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || + (ifp->if_bytes > 0)); if ((iip->ili_format.ilf_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); @@ -3704,3 +3612,327 @@ xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); } #endif + +/* + * Return a pointer to the extent record at file index idx. + */ +xfs_bmbt_rec_t * +xfs_iext_get_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx) /* index of target extent */ +{ + ASSERT(idx >= 0); + if (ifp->if_bytes) { + return &ifp->if_u1.if_extents[idx]; + } else { + return NULL; + } +} + +/* + * Insert new item(s) into the extent records for incore inode + * fork 'ifp'. 'count' new items are inserted at index 'idx'. + */ +void +xfs_iext_insert( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* starting index of new items */ + xfs_extnum_t count, /* number of inserted items */ + xfs_bmbt_irec_t *new) /* items to insert */ +{ + xfs_bmbt_rec_t *ep; /* extent record pointer */ + xfs_extnum_t i; /* extent record index */ + + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_iext_add(ifp, idx, count); + for (i = idx; i < idx + count; i++, new++) { + ep = xfs_iext_get_ext(ifp, i); + xfs_bmbt_set_all(ep, new); + } +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be increased. The ext_diff parameter stores the + * number of new extents being added and the idx parameter contains + * the extent index where the new extents will be added. If the new + * extents are being appended, then we just need to (re)allocate and + * initialize the space. Otherwise, if the new extents are being + * inserted into the middle of the existing entries, a bit more work + * is required to make room for the new extents to be inserted. The + * caller is responsible for filling in the new extent entries upon + * return. + */ +void +xfs_iext_add( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin adding exts */ + int ext_diff) /* nubmer of extents to add */ +{ + int byte_diff; /* new bytes being added */ + int new_size; /* size of extents after adding */ + xfs_extnum_t nextents; /* number of extents in file */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT((idx >= 0) && (idx <= nextents)); + byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); + new_size = ifp->if_bytes + byte_diff; + /* + * If the new number of extents (nextents + ext_diff) + * fits inside the inode, then continue to use the inline + * extent buffer. + */ + if (nextents + ext_diff <= XFS_INLINE_EXTS) { + if (idx < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], + &ifp->if_u2.if_inline_ext[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); + } + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; + } + /* + * Otherwise use a linear (direct) extent list. + * If the extents are currently inside the inode, + * xfs_iext_realloc_direct will switch us from + * inline to direct extent allocation mode. + */ + else { + xfs_iext_realloc_direct(ifp, new_size); + if (idx < nextents) { + memmove(&ifp->if_u1.if_extents[idx + ext_diff], + &ifp->if_u1.if_extents[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); + } + } + ifp->if_bytes = new_size; +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be decreased. The ext_diff parameter stores the + * number of extents to be removed and the idx parameter contains + * the extent index where the extents will be removed from. + */ +void +xfs_iext_remove( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + ASSERT(ext_diff > 0); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else if (ifp->if_real_bytes) { + xfs_iext_remove_direct(ifp, idx, ext_diff); + } else { + xfs_iext_remove_inline(ifp, idx, ext_diff); + } + ifp->if_bytes = new_size; +} + +/* + * This removes ext_diff extents from the inline buffer, beginning + * at extent index idx. + */ +void +xfs_iext_remove_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + int nextents; /* number of extents in file */ + + ASSERT(idx < XFS_INLINE_EXTS); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(((nextents - ext_diff) > 0) && + (nextents - ext_diff) < XFS_INLINE_EXTS); + + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx], + &ifp->if_u2.if_inline_ext[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + } else { + memset(&ifp->if_u2.if_inline_ext[idx], 0, + ext_diff * sizeof(xfs_bmbt_rec_t)); + } +} + +/* + * This removes ext_diff extents from a linear (direct) extent list, + * beginning at extent index idx. If the extents are being removed + * from the end of the list (ie. truncate) then we just need to re- + * allocate the list to remove the extra space. Otherwise, if the + * extents are being removed from the middle of the existing extent + * entries, then we first need to move the extent records beginning + * at idx + ext_diff up in the list to overwrite the records being + * removed, then remove the extra space via kmem_realloc. + */ +void +xfs_iext_remove_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + new_size = ifp->if_bytes - + (ext_diff * sizeof(xfs_bmbt_rec_t)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + return; + } + /* Move extents up in the list (if needed) */ + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u1.if_extents[idx], + &ifp->if_u1.if_extents[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + } + memset(&ifp->if_u1.if_extents[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + /* + * Reallocate the direct extent list. If the extents + * will fit inside the inode then xfs_iext_realloc_direct + * will switch from direct to inline extent allocation + * mode for us. + */ + xfs_iext_realloc_direct(ifp, new_size); + ifp->if_bytes = new_size; +} + +/* + * Create, destroy, or resize a linear (direct) block of extents. + */ +void +xfs_iext_realloc_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new size of extents */ +{ + int rnew_size; /* real new size of extents */ + + rnew_size = new_size; + + /* Free extent records */ + if (new_size == 0) { + xfs_iext_destroy(ifp); + } + /* Resize direct extent list and zero any new bytes */ + else if (ifp->if_real_bytes) { + /* Check if extents will fit inside the inode */ + if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { + xfs_iext_direct_to_inline(ifp, new_size / + (uint)sizeof(xfs_bmbt_rec_t)); + ifp->if_bytes = new_size; + return; + } + if ((new_size & (new_size - 1)) != 0) { + rnew_size = xfs_iroundup(new_size); + } + if (rnew_size != ifp->if_real_bytes) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_realloc(ifp->if_u1.if_extents, + rnew_size, + ifp->if_real_bytes, + KM_SLEEP); + } + if (rnew_size > ifp->if_real_bytes) { + memset(&ifp->if_u1.if_extents[ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)], 0, + rnew_size - ifp->if_real_bytes); + } + } + /* + * Switch from the inline extent buffer to a direct + * extent list. Be sure to include the inline extent + * bytes in new_size. + */ + else { + new_size += ifp->if_bytes; + if ((new_size & (new_size - 1)) != 0) { + rnew_size = xfs_iroundup(new_size); + } + xfs_iext_inline_to_direct(ifp, rnew_size); + } + ifp->if_real_bytes = rnew_size; + ifp->if_bytes = new_size; +} + +/* + * Switch from linear (direct) extent records to inline buffer. + */ +void +xfs_iext_direct_to_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t nextents) /* number of extents in file */ +{ + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(nextents <= XFS_INLINE_EXTS); + /* + * The inline buffer was zeroed when we switched + * from inline to direct extent allocation mode, + * so we don't need to clear it here. + */ + memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, + nextents * sizeof(xfs_bmbt_rec_t)); + kmem_free(ifp->if_u1.if_extents, KM_SLEEP); + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; +} + +/* + * Switch from inline buffer to linear (direct) extent records. + * new_size should already be rounded up to the next power of 2 + * by the caller (when appropriate), so use new_size as it is. + * However, since new_size may be rounded up, we can't update + * if_bytes here. It is the caller's responsibility to update + * if_bytes upon return. + */ +void +xfs_iext_inline_to_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* number of extents in file */ +{ + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_alloc(new_size, KM_SLEEP); + memset(ifp->if_u1.if_extents, 0, new_size); + if (ifp->if_bytes) { + memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, + ifp->if_bytes); + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_real_bytes = new_size; +} + +/* + * Free incore file extents. + */ +void +xfs_iext_destroy( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + if (ifp->if_real_bytes) { + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + } else if (ifp->if_bytes) { + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_u1.if_extents = NULL; + ifp->if_real_bytes = 0; + ifp->if_bytes = 0; +} -- cgit v1.2.3 From 0293ce3a9fd1b34c933a96577a8ba737b681cf75 Mon Sep 17 00:00:00 2001 From: Mandy Kirkconnell Date: Tue, 14 Mar 2006 13:30:23 +1100 Subject: [XFS] 929045 567344 This mod introduces multi-level in-core file extent functionality, building upon the new layout introduced in mod xfs-linux:xfs-kern:207390a. The new multi-level extent allocations are only required for heavily fragmented files, so the old-style linear extent list is used on files until the extents reach a pre-determined size of 4k. 4k buffers are used because this is the system page size on Linux i386 and systems with larger page sizes don't seem to gain much, if anything, by using their native page size as the extent buffer size. Also, using 4k extent buffers everywhere provides a consistent interface for CXFS across different platforms. The 4k extent buffers are managed by an indirection array (xfs_ext_irec_t) which is basically just a pointer array with a bit of extra information to keep track of the number of extents in each buffer as well as the extent offset of each buffer. Major changes include: - Add multi-level in-core file extent functionality to the xfs_iext_ subroutines introduced in mod: xfs-linux:xfs-kern:207390a - Introduce 13 new subroutines which add functionality for multi-level in-core file extents: xfs_iext_add_indirect_multi() xfs_iext_remove_indirect() xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct() xfs_iext_bno_to_irec() xfs_iext_idx_to_irec() xfs_iext_irec_init() xfs_iext_irec_new() xfs_iext_irec_remove() xfs_iext_irec_compact() xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full() xfs_iext_irec_update_extoffs() SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207393a Signed-off-by: Mandy Kirkconnell Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 715 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 710 insertions(+), 5 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6459395a0e40..580fa0758039 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2630,8 +2630,9 @@ xfs_idestroy_fork( ifp->if_real_bytes = 0; } } else if ((ifp->if_flags & XFS_IFEXTENTS) && - (ifp->if_u1.if_extents != NULL) && - (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { + ((ifp->if_flags & XFS_IFEXTIREC) || + ((ifp->if_u1.if_extents != NULL) && + (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { ASSERT(ifp->if_real_bytes != 0); xfs_iext_destroy(ifp); } @@ -3622,7 +3623,16 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); - if (ifp->if_bytes) { + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { + return ifp->if_u1.if_ext_irec->er_extbuf; + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_ext_irec_t *erp; /* irec pointer */ + int erp_idx = 0; /* irec index */ + xfs_extnum_t page_idx = idx; /* ext index in target list */ + + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + return &erp->er_extbuf[page_idx]; + } else if (ifp->if_bytes) { return &ifp->if_u1.if_extents[idx]; } else { return NULL; @@ -3691,6 +3701,7 @@ xfs_iext_add( } ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; + ifp->if_lastex = nextents + ext_diff; } /* * Otherwise use a linear (direct) extent list. @@ -3698,7 +3709,7 @@ xfs_iext_add( * xfs_iext_realloc_direct will switch us from * inline to direct extent allocation mode. */ - else { + else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { xfs_iext_realloc_direct(ifp, new_size); if (idx < nextents) { memmove(&ifp->if_u1.if_extents[idx + ext_diff], @@ -3707,14 +3718,182 @@ xfs_iext_add( memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); } } + /* Indirection array */ + else { + xfs_ext_irec_t *erp; + int erp_idx = 0; + int page_idx = idx; + + ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); + if (ifp->if_flags & XFS_IFEXTIREC) { + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); + } else { + xfs_iext_irec_init(ifp); + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = ifp->if_u1.if_ext_irec; + } + /* Extents fit in target extent page */ + if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { + if (page_idx < erp->er_extcount) { + memmove(&erp->er_extbuf[page_idx + ext_diff], + &erp->er_extbuf[page_idx], + (erp->er_extcount - page_idx) * + sizeof(xfs_bmbt_rec_t)); + memset(&erp->er_extbuf[page_idx], 0, byte_diff); + } + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + } + /* Insert a new extent page */ + else if (erp) { + xfs_iext_add_indirect_multi(ifp, + erp_idx, page_idx, ext_diff); + } + /* + * If extent(s) are being appended to the last page in + * the indirection array and the new extent(s) don't fit + * in the page, then erp is NULL and erp_idx is set to + * the next index needed in the indirection array. + */ + else { + int count = ext_diff; + + while (count) { + erp = xfs_iext_irec_new(ifp, erp_idx); + erp->er_extcount = count; + count -= MIN(count, (int)XFS_LINEAR_EXTS); + if (count) { + erp_idx++; + } + } + } + } ifp->if_bytes = new_size; } +/* + * This is called when incore extents are being added to the indirection + * array and the new extents do not fit in the target extent list. The + * erp_idx parameter contains the irec index for the target extent list + * in the indirection array, and the idx parameter contains the extent + * index within the list. The number of extents being added is stored + * in the count parameter. + * + * |-------| |-------| + * | | | | idx - number of extents before idx + * | idx | | count | + * | | | | count - number of extents being inserted at idx + * |-------| |-------| + * | count | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_add_indirect_multi( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* target extent irec index */ + xfs_extnum_t idx, /* index within target list */ + int count) /* new extents being added */ +{ + int byte_diff; /* new bytes being added */ + xfs_ext_irec_t *erp; /* pointer to irec entry */ + xfs_extnum_t ext_diff; /* number of extents to add */ + xfs_extnum_t ext_cnt; /* new extents still needed */ + xfs_extnum_t nex2; /* extents after idx + count */ + xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ + int nlists; /* number of irec's (lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex2 = erp->er_extcount - idx; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* + * Save second part of target extent list + * (all extents past */ + if (nex2) { + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); + memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); + erp->er_extcount -= nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); + memset(&erp->er_extbuf[idx], 0, byte_diff); + } + + /* + * Add the new extents to the end of the target + * list, then allocate new irec record(s) and + * extent buffer(s) as needed to store the rest + * of the new extents. + */ + ext_cnt = count; + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); + if (ext_diff) { + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + while (ext_cnt) { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); + erp->er_extcount = ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + + /* Add nex2 extents back to indirection array */ + if (nex2) { + xfs_extnum_t ext_avail; + int i; + + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; + i = 0; + /* + * If nex2 extents fit in the current page, append + * nex2_ep after the new extents. + */ + if (nex2 <= ext_avail) { + i = erp->er_extcount; + } + /* + * Otherwise, check if space is available in the + * next page. + */ + else if ((erp_idx < nlists - 1) && + (nex2 <= (ext_avail = XFS_LINEAR_EXTS - + ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { + erp_idx++; + erp++; + /* Create a hole for nex2 extents */ + memmove(&erp->er_extbuf[nex2], erp->er_extbuf, + erp->er_extcount * sizeof(xfs_bmbt_rec_t)); + } + /* + * Final choice, create a new extent page for + * nex2 extents. + */ + else { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + } + memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); + kmem_free(nex2_ep, byte_diff); + erp->er_extcount += nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); + } +} + /* * This is called when the amount of space required for incore file * extents needs to be decreased. The ext_diff parameter stores the * number of extents to be removed and the idx parameter contains * the extent index where the extents will be removed from. + * + * If the amount of space needed has decreased below the linear + * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous + * extent array. Otherwise, use kmem_realloc() to adjust the + * size to what is needed. */ void xfs_iext_remove( @@ -3731,6 +3910,8 @@ xfs_iext_remove( if (new_size == 0) { xfs_iext_destroy(ifp); + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_iext_remove_indirect(ifp, idx, ext_diff); } else if (ifp->if_real_bytes) { xfs_iext_remove_direct(ifp, idx, ext_diff); } else { @@ -3751,6 +3932,7 @@ xfs_iext_remove_inline( { int nextents; /* number of extents in file */ + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); ASSERT(idx < XFS_INLINE_EXTS); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(((nextents - ext_diff) > 0) && @@ -3788,6 +3970,7 @@ xfs_iext_remove_direct( xfs_extnum_t nextents; /* number of extents in file */ int new_size; /* size of extents after removal */ + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); new_size = ifp->if_bytes - (ext_diff * sizeof(xfs_bmbt_rec_t)); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); @@ -3815,6 +3998,84 @@ xfs_iext_remove_direct( ifp->if_bytes = new_size; } +/* + * This is called when incore extents are being removed from the + * indirection array and the extents being removed span multiple extent + * buffers. The idx parameter contains the file extent index where we + * want to begin removing extents, and the count parameter contains + * how many extents need to be removed. + * + * |-------| |-------| + * | nex1 | | | nex1 - number of extents before idx + * |-------| | count | + * | | | | count - number of extents being removed at idx + * | count | |-------| + * | | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_remove_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing extents */ + int count) /* number of extents to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int erp_idx = 0; /* indirection array index */ + xfs_extnum_t ext_cnt; /* extents left to remove */ + xfs_extnum_t ext_diff; /* extents to remove in current list */ + xfs_extnum_t nex1; /* number of extents before idx */ + xfs_extnum_t nex2; /* extents after idx + count */ + int nlists; /* entries in indirecton array */ + int page_idx = idx; /* index in target extent list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + ASSERT(erp != NULL); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + nex1 = page_idx; + ext_cnt = count; + while (ext_cnt) { + nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); + ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); + /* + * Check for deletion of entire list; + * xfs_iext_irec_remove() updates extent offsets. + */ + if (ext_diff == erp->er_extcount) { + xfs_iext_irec_remove(ifp, erp_idx); + ext_cnt -= ext_diff; + nex1 = 0; + if (ext_cnt) { + ASSERT(erp_idx < ifp->if_real_bytes / + XFS_IEXT_BUFSZ); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex1 = 0; + continue; + } else { + break; + } + } + /* Move extents up (if needed) */ + if (nex2) { + memmove(&erp->er_extbuf[nex1], + &erp->er_extbuf[nex1 + ext_diff], + nex2 * sizeof(xfs_bmbt_rec_t)); + } + /* Zero out rest of page */ + memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - + ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); + /* Update remaining counters */ + erp->er_extcount -= ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); + ext_cnt -= ext_diff; + nex1 = 0; + erp_idx++; + erp++; + } + ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); + xfs_iext_irec_compact(ifp); +} + /* * Create, destroy, or resize a linear (direct) block of extents. */ @@ -3827,6 +4088,10 @@ xfs_iext_realloc_direct( rnew_size = new_size; + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || + ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && + (new_size != ifp->if_real_bytes))); + /* Free extent records */ if (new_size == 0) { xfs_iext_destroy(ifp); @@ -3919,6 +4184,60 @@ xfs_iext_inline_to_direct( ifp->if_real_bytes = new_size; } +/* + * Resize an extent indirection array to new_size bytes. + */ +void +xfs_iext_realloc_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new indirection array size */ +{ + int nlists; /* number of irec's (ex lists) */ + int size; /* current indirection array size */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + size = nlists * sizeof(xfs_ext_irec_t); + ASSERT(ifp->if_real_bytes); + ASSERT((new_size >= 0) && (new_size != size)); + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else { + ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) + kmem_realloc(ifp->if_u1.if_ext_irec, + new_size, size, KM_SLEEP); + } +} + +/* + * Switch from indirection array to linear (direct) extent allocations. + */ +void +xfs_iext_indirect_to_direct( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_bmbt_rec_t *ep; /* extent record pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + int size; /* size of file extents */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + size = nextents * sizeof(xfs_bmbt_rec_t); + + xfs_iext_irec_compact_full(ifp); + ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); + + ep = ifp->if_u1.if_ext_irec->er_extbuf; + kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); + ifp->if_flags &= ~XFS_IFEXTIREC; + ifp->if_u1.if_extents = ep; + ifp->if_bytes = size; + if (nextents < XFS_LINEAR_EXTS) { + xfs_iext_realloc_direct(ifp, size); + } +} + /* * Free incore file extents. */ @@ -3926,7 +4245,16 @@ void xfs_iext_destroy( xfs_ifork_t *ifp) /* inode fork pointer */ { - if (ifp->if_real_bytes) { + if (ifp->if_flags & XFS_IFEXTIREC) { + int erp_idx; + int nlists; + + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { + xfs_iext_irec_remove(ifp, erp_idx); + } + ifp->if_flags &= ~XFS_IFEXTIREC; + } else if (ifp->if_real_bytes) { kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); } else if (ifp->if_bytes) { memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * @@ -3936,3 +4264,380 @@ xfs_iext_destroy( ifp->if_real_bytes = 0; ifp->if_bytes = 0; } + +/* + * Return a pointer to the indirection array entry containing the + * extent record for filesystem block bno. Store the index of the + * target irec in *erp_idxp. + */ +xfs_ext_irec_t * +xfs_iext_bno_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + int *erp_idxp) /* irec index of target ext list */ +{ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + xfs_ext_irec_t *erp_next; /* next indirection array entry */ + xfs_extnum_t erp_idx; /* indirection array index */ + int nlists; /* number of extent irec's (lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; + if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { + high = erp_idx - 1; + } else if (erp_next && bno >= + xfs_bmbt_get_startoff(erp_next->er_extbuf)) { + low = erp_idx + 1; + } else { + break; + } + } + *erp_idxp = erp_idx; + return erp; +} + +/* + * Return a pointer to the indirection array entry containing the + * extent record at file extent index *idxp. Store the index of the + * target irec in *erp_idxp and store the page index of the target + * extent record in *idxp. + */ +xfs_ext_irec_t * +xfs_iext_idx_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t *idxp, /* extent index (file -> page) */ + int *erp_idxp, /* pointer to target irec */ + int realloc) /* new bytes were just added */ +{ + xfs_ext_irec_t *prev; /* pointer to previous irec */ + xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ + int erp_idx; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + xfs_extnum_t page_idx = *idxp; /* extent index in target list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + ASSERT(page_idx >= 0 && page_idx <= + ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + + /* Binary search extent irec's */ + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + prev = erp_idx > 0 ? erp - 1 : NULL; + if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && + realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { + high = erp_idx - 1; + } else if (page_idx > erp->er_extoff + erp->er_extcount || + (page_idx == erp->er_extoff + erp->er_extcount && + !realloc)) { + low = erp_idx + 1; + } else if (page_idx == erp->er_extoff + erp->er_extcount && + erp->er_extcount == XFS_LINEAR_EXTS) { + ASSERT(realloc); + page_idx = 0; + erp_idx++; + erp = erp_idx < nlists ? erp + 1 : NULL; + break; + } else { + page_idx -= erp->er_extoff; + break; + } + } + *idxp = page_idx; + *erp_idxp = erp_idx; + return(erp); +} + +/* + * Allocate and initialize an indirection array once the space needed + * for incore extents increases above XFS_IEXT_BUFSZ. + */ +void +xfs_iext_irec_init( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + + erp = (xfs_ext_irec_t *) + kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); + + if (nextents == 0) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + } else if (!ifp->if_real_bytes) { + xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); + } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { + xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); + } + erp->er_extbuf = ifp->if_u1.if_extents; + erp->er_extcount = nextents; + erp->er_extoff = 0; + + ifp->if_flags |= XFS_IFEXTIREC; + ifp->if_real_bytes = XFS_IEXT_BUFSZ; + ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); + ifp->if_u1.if_ext_irec = erp; + + return; +} + +/* + * Allocate and initialize a new entry in the indirection array. + */ +xfs_ext_irec_t * +xfs_iext_irec_new( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* index for new irec */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* Resize indirection array */ + xfs_iext_realloc_indirect(ifp, ++nlists * + sizeof(xfs_ext_irec_t)); + /* + * Move records down in the array so the + * new page can use erp_idx. + */ + erp = ifp->if_u1.if_ext_irec; + for (i = nlists - 1; i > erp_idx; i--) { + memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); + } + ASSERT(i == erp_idx); + + /* Initialize new extent record */ + erp = ifp->if_u1.if_ext_irec; + erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *) + kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; + memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); + erp[erp_idx].er_extcount = 0; + erp[erp_idx].er_extoff = erp_idx > 0 ? + erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; + return (&erp[erp_idx]); +} + +/* + * Remove a record from the indirection array. + */ +void +xfs_iext_irec_remove( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* irec index to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + if (erp->er_extbuf) { + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, + -erp->er_extcount); + kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); + } + /* Compact extent records */ + erp = ifp->if_u1.if_ext_irec; + for (i = erp_idx; i < nlists - 1; i++) { + memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); + } + /* + * Manually free the last extent record from the indirection + * array. A call to xfs_iext_realloc_indirect() with a size + * of zero would result in a call to xfs_iext_destroy() which + * would in turn call this function again, creating a nasty + * infinite loop. + */ + if (--nlists) { + xfs_iext_realloc_indirect(ifp, + nlists * sizeof(xfs_ext_irec_t)); + } else { + kmem_free(ifp->if_u1.if_ext_irec, + sizeof(xfs_ext_irec_t)); + } + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; +} + +/* + * This is called to clean up large amounts of unused memory allocated + * by the indirection array. Before compacting anything though, verify + * that the indirection array is still needed and switch back to the + * linear extent list (or even the inline buffer) if possible. The + * compaction policy is as follows: + * + * Full Compaction: Extents fit into a single page (or inline buffer) + * Full Compaction: Extents occupy less than 10% of allocated space + * Partial Compaction: Extents occupy > 10% and < 50% of allocated space + * No Compaction: Extents occupy at least 50% of allocated space + */ +void +xfs_iext_irec_compact( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (nextents == 0) { + xfs_iext_destroy(ifp); + } else if (nextents <= XFS_INLINE_EXTS) { + xfs_iext_indirect_to_direct(ifp); + xfs_iext_direct_to_inline(ifp, nextents); + } else if (nextents <= XFS_LINEAR_EXTS) { + xfs_iext_indirect_to_direct(ifp); + } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { + xfs_iext_irec_compact_full(ifp); + } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { + xfs_iext_irec_compact_pages(ifp); + } +} + +/* + * Combine extents from neighboring extent pages. + */ +void +xfs_iext_irec_compact_pages( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ + int erp_idx = 0; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + while (erp_idx < nlists - 1) { + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp + 1; + if (erp_next->er_extcount <= + (XFS_LINEAR_EXTS - erp->er_extcount)) { + memmove(&erp->er_extbuf[erp->er_extcount], + erp_next->er_extbuf, erp_next->er_extcount * + sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += erp_next->er_extcount; + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + } else { + erp_idx++; + } + } +} + +/* + * Fully compact the extent records managed by the indirection array. + */ +void +xfs_iext_irec_compact_full( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */ + xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ + int erp_idx = 0; /* extent irec index */ + int ext_avail; /* empty entries in ex list */ + int ext_diff; /* number of exts to add */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp = ifp->if_u1.if_ext_irec; + ep = &erp->er_extbuf[erp->er_extcount]; + erp_next = erp + 1; + ep_next = erp_next->er_extbuf; + while (erp_idx < nlists - 1) { + ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; + ext_diff = MIN(ext_avail, erp_next->er_extcount); + memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += ext_diff; + erp_next->er_extcount -= ext_diff; + /* Remove next page */ + if (erp_next->er_extcount == 0) { + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf, + erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + /* Update next page */ + } else { + /* Move rest of page up to become next new page */ + memmove(erp_next->er_extbuf, ep_next, + erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); + ep_next = erp_next->er_extbuf; + memset(&ep_next[erp_next->er_extcount], 0, + (XFS_LINEAR_EXTS - erp_next->er_extcount) * + sizeof(xfs_bmbt_rec_t)); + } + if (erp->er_extcount == XFS_LINEAR_EXTS) { + erp_idx++; + if (erp_idx < nlists) + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + else + break; + } + ep = &erp->er_extbuf[erp->er_extcount]; + erp_next = erp + 1; + ep_next = erp_next->er_extbuf; + } +} + +/* + * This is called to update the er_extoff field in the indirection + * array when extents have been added or removed from one of the + * extent lists. erp_idx contains the irec index to begin updating + * at and ext_diff contains the number of extents that were added + * or removed. + */ +void +xfs_iext_irec_update_extoffs( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* irec index to update */ + int ext_diff) /* number of new extents */ +{ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (i = erp_idx; i < nlists; i++) { + ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; + } +} -- cgit v1.2.3 From 8867bc9bf0aed7181aa72c7c938c6ce830b75166 Mon Sep 17 00:00:00 2001 From: Mandy Kirkconnell Date: Fri, 17 Mar 2006 17:25:04 +1100 Subject: [XFS] There are a few problems with the new xfs_bmap_search_multi_extents() wrapper function that I introduced in mod xfs-linux:xfs-kern:207393a. The function was added as a wrapper around xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS interface. The idea of the function was basically to extract the target extent buffer (if muli- level extent allocation mode), then call xfs_bmap_do_search_extents() with either a pointer to the first extent in the target buffer or a pointer to the first extent in the file, depending on which extent mode was being used. However, in addition to locating the target extent record for block bno, xfs_bmap_do_search_extents() also sets four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp. Passing only the target extent buffer to xfs_bmap_do_search_extents() causes *eofp to be set incorrectly if the extent is at the end of the target list but there are actually more extents in the next er_extbuf. Likewise, if the extent is the first one in the buffer but NOT the first in the file, *prevp is incorrectly set to NULL. Adding the needed functionality to xfs_bmap_search_multi_extents() to re-set any incorrectly set fields is redundant and makes the call to xfs_bmap_do_search_extents() not make much sense when multi-level extent allocation mode is being used. This mod basically extracts the two functional components from xfs_bmap_do_search_extents(), with the intent of obsoleting/removing xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent changes are checked in. The two components are: 1) The binary search to locate the target extent record, and 2) Setting the four parameters needed by the caller (*lastx, *eofp, *gotp, *prevp). Component 1: I created a new function in xfs_inode.c called xfs_iext_bno_to_ext(), which executes the binary search to find the target extent record. xfs_bmap_search_multi_extents() has been modified to call xfs_iext_bno_to_ext() rather than xfs_bmap_do_search_extents(). Component 2: The parameter setting functionality has been added to xfs_bmap_search_multi_extents(), eliminating the need for xfs_bmap_do_search_extents(). These changes make the removal of xfs_bmap_do_search_extents() trival once the CXFS changes are in place. They also allow us to maintain the current XFS interface, using the new search function introduced in mod xfs-linux:xfs-kern:207393a. SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207866a Signed-off-by: Mandy Kirkconnell Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 580fa0758039..a16df2d435fe 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -4265,12 +4265,81 @@ xfs_iext_destroy( ifp->if_bytes = 0; } +/* + * Return a pointer to the extent record for file system block bno. + */ +xfs_bmbt_rec_t * /* pointer to found extent record */ +xfs_iext_bno_to_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + xfs_extnum_t *idxp) /* index of target extent */ +{ + xfs_bmbt_rec_t *base; /* pointer to first extent */ + xfs_filblks_t blockcount = 0; /* number of blocks in extent */ + xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + int high; /* upper boundry in search */ + xfs_extnum_t idx = 0; /* index of target extent */ + int low; /* lower boundry in search */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_fileoff_t startoff = 0; /* start offset of extent */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *idxp = 0; + return NULL; + } + low = 0; + if (ifp->if_flags & XFS_IFEXTIREC) { + /* Find target extent list */ + int erp_idx = 0; + erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); + base = erp->er_extbuf; + high = erp->er_extcount - 1; + } else { + base = ifp->if_u1.if_extents; + high = nextents - 1; + } + /* Binary search extent records */ + while (low <= high) { + idx = (low + high) >> 1; + ep = base + idx; + startoff = xfs_bmbt_get_startoff(ep); + blockcount = xfs_bmbt_get_blockcount(ep); + if (bno < startoff) { + high = idx - 1; + } else if (bno >= startoff + blockcount) { + low = idx + 1; + } else { + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + *idxp = idx; + return ep; + } + } + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + if (bno >= startoff + blockcount) { + if (++idx == nextents) { + ep = NULL; + } else { + ep = xfs_iext_get_ext(ifp, idx); + } + } + *idxp = idx; + return ep; +} + /* * Return a pointer to the indirection array entry containing the * extent record for filesystem block bno. Store the index of the * target irec in *erp_idxp. */ -xfs_ext_irec_t * +xfs_ext_irec_t * /* pointer to found extent record */ xfs_iext_bno_to_irec( xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fileoff_t bno, /* block number to search for */ @@ -4278,7 +4347,7 @@ xfs_iext_bno_to_irec( { xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ xfs_ext_irec_t *erp_next; /* next indirection array entry */ - xfs_extnum_t erp_idx; /* indirection array index */ + int erp_idx; /* indirection array index */ int nlists; /* number of extent irec's (lists) */ int high; /* binary search upper limit */ int low; /* binary search lower limit */ -- cgit v1.2.3 From ec86dc02fdc062d0d298814b1e78b482ab38caf7 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 17 Mar 2006 17:25:36 +1100 Subject: [XFS] Complete transition away from linvfs naming convention, finally. SGI-PV: 947038 SGI-Modid: xfs-linux-melb:xfs-kern:25474a Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a16df2d435fe..86a8451d10fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2723,7 +2723,7 @@ xfs_iunpin( /* make sync come back and flush this inode */ if (vp) { - struct inode *inode = LINVFS_GET_IP(vp); + struct inode *inode = vn_to_inode(vp); if (!(inode->i_state & I_NEW)) mark_inode_dirty_sync(inode); @@ -3519,7 +3519,7 @@ xfs_iaccess( { int error; mode_t orgmode = mode; - struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); + struct inode *inode = vn_to_inode(XFS_ITOV(ip)); if (mode & S_IWUSR) { umode_t imode = inode->i_mode; -- cgit v1.2.3 From b12dd34298cf0cff9f337f667045335140873039 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 17 Mar 2006 17:26:04 +1100 Subject: [XFS] Fix an infinite loop issue in bulkstat when a corrupt inode is detected. Thanks to Roger Willcocks. SGI-PV: 951054 SGI-Modid: xfs-linux-melb:xfs-kern:25477a Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 86a8451d10fc..b1e95707e7cf 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -253,7 +253,8 @@ xfs_itobp( xfs_inode_t *ip, xfs_dinode_t **dipp, xfs_buf_t **bpp, - xfs_daddr_t bno) + xfs_daddr_t bno, + uint imap_flags) { xfs_buf_t *bp; int error; @@ -269,10 +270,9 @@ xfs_itobp( * inode on disk. */ imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP); - if (error != 0) { + if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, + XFS_IMAP_LOOKUP | imap_flags))) return error; - } /* * If the inode number maps to a block outside the bounds @@ -336,9 +336,10 @@ xfs_itobp( * (if DEBUG kernel) or the first inode in the buffer, otherwise. */ #ifdef DEBUG - ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; + ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : + (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); #else - ni = 1; + ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; #endif for (i = 0; i < ni; i++) { int di_ok; @@ -868,9 +869,8 @@ xfs_iread( * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will * know that this is a new incore inode. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno); - - if (error != 0) { + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); + if (error) { kmem_zone_free(xfs_inode_zone, ip); return error; } @@ -1895,7 +1895,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { return error; } @@ -2004,7 +2004,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2066,7 +2066,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -3023,8 +3023,8 @@ xfs_iflush( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); - if (error != 0) { + error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); + if (error) { xfs_ifunlock(ip); return error; } -- cgit v1.2.3 From 3b244aa81ecb06859e0c16abf4c26404c1d86591 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 17 Mar 2006 17:29:25 +1100 Subject: [XFS] endianess annotations for xfs_attr_shortform_t SGI-PV: 943272 SGI-Modid: xfs-linux-melb:xfs-kern:25501a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b1e95707e7cf..7d0ded05a467 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -506,7 +506,7 @@ xfs_iformat( switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); - size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT); + size = be16_to_cpu(atp->hdr.totsize); error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: -- cgit v1.2.3 From f1fdc848aab7fb95b32e058b7f06cc07912b3734 Mon Sep 17 00:00:00 2001 From: Yingping Lu Date: Wed, 22 Mar 2006 12:44:15 +1100 Subject: [XFS] Fixing KDB's xrwtrc command, also added the current process id into the trace. SGI-PV: 948300 SGI-Modid: xfs-linux-melb:xfs-kern:208069a Signed-off-by: Yingping Lu Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7d0ded05a467..2424a4777949 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1372,10 +1372,10 @@ xfs_itrunc_trace( (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), (void*)(unsigned long)(toss_finish & 0xffffffff), (void*)(unsigned long)current_cpu(), - (void*)0, - (void*)0, - (void*)0, - (void*)0); + (void*)(unsigned long)current_pid(), + (void*)NULL, + (void*)NULL, + (void*)NULL); } #else #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) -- cgit v1.2.3 From 9fa8046f50bcb88ab9183ee1f22de5adc42bf92a Mon Sep 17 00:00:00 2001 From: Yingping Lu Date: Wed, 22 Mar 2006 12:44:35 +1100 Subject: [XFS] Fixing the error caused by the conflict between DIO Write's conversion and concurrent truncate operations. Use vn_iowait to wait for the completion of any pending DIOs. Since the truncate requires exclusive IOLOCK, so this blocks any further DIO operations since DIO write also needs exclusive IOBLOCK. This serves as a barrier and prevent any potential starvation. SGI-PV: 947420 SGI-Modid: xfs-linux-melb:xfs-kern:208088a Signed-off-by: Yingping Lu Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2424a4777949..8d2b36879f06 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1420,6 +1420,9 @@ xfs_itruncate_start( mp = ip->i_mount; vp = XFS_ITOV(ip); + + vn_iowait(vp); /* wait for the completion of any pending DIOs */ + /* * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers * overlapping the region being removed. We have to use -- cgit v1.2.3 From 38e2299a641d93d029eb559e096648ab75a22be2 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Wed, 22 Mar 2006 12:47:15 +1100 Subject: [XFS] Explain the race closed by the addition of vn_iowait() to the start of xfs_itruncate_start(). SGI-PV: 947420 SGI-Modid: xfs-linux-melb:xfs-kern:25527a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/xfs_inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8d2b36879f06..88a517fad07b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1393,6 +1393,16 @@ xfs_itrunc_trace( * calling into the buffer/page cache code and we can't hold the * inode lock when we do so. * + * We need to wait for any direct I/Os in flight to complete before we + * proceed with the truncate. This is needed to prevent the extents + * being read or written by the direct I/Os from being removed while the + * I/O is in flight as there is no other method of synchronising + * direct I/O with the truncate operation. Also, because we hold + * the IOLOCK in exclusive mode, we prevent new direct I/Os from being + * started until the truncate completes and drops the lock. Essentially, + * the vn_iowait() call forms an I/O barrier that provides strict ordering + * between direct I/Os and the truncate operation. + * * The flags parameter can have either the value XFS_ITRUNC_DEFINITE * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used * in the case that the caller is locking things out of order and -- cgit v1.2.3