From 7124fe0a5b619d65b739477b3b55a20bf805b06d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 24 Jun 2010 11:15:33 +1000 Subject: xfs: validate untrusted inode numbers during lookup When we decode a handle or do a bulkstat lookup, we are using an inode number we cannot trust to be valid. If we are deleting inode chunks from disk (default noikeep mode), then we cannot trust the on disk inode buffer for any given inode number to correctly reflect whether the inode has been unlinked as the di_mode nor the generation number may have been updated on disk. This is due to the fact that when we delete an inode chunk, we do not write the clusters back to disk when they are removed - instead we mark them stale to avoid them being written back potentially over the top of something that has been subsequently allocated at that location. The result is that we can have locations of disk that look like they contain valid inodes but in reality do not. Hence we cannot simply convert the inode number to a block number and read the location from disk to determine if the inode is valid or not. As a result, and XFS_IGET_BULKSTAT lookup needs to actually look the inode up in the inode allocation btree to determine if the inode number is valid or not. It should be noted even on ikeep filesystems, there is the possibility that blocks on disk may look like valid inode clusters. e.g. if there are filesystem images hosted on the filesystem. Hence even for ikeep filesystems we really need to validate that the inode number is valid before issuing the inode buffer read. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ialloc.c | 121 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 43 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 9d884c127bb9..0c946c8e05da 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1203,6 +1203,63 @@ error0: return error; } +STATIC int +xfs_imap_lookup( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_agblock_t *chunk_agbno, + xfs_agblock_t *offset_agbno, + int flags) +{ + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + xfs_agino_t startino; + int error; + int i; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "xfs_ialloc_read_agi() returned " + "error %d, agno %d", + error, agno); + return error; + } + + /* + * derive and lookup the exact inode record for the given agino. If the + * record cannot be found, then it's an invalid inode number and we + * should abort. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); + error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &rec, &i); + if (!error && i == 0) + error = EINVAL; + } + + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_BULKSTAT) && + (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) + return EINVAL; + + *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); + *offset_agbno = agbno - *chunk_agbno; + return 0; +} + /* * Return the location of the inode in imap, for mapping it into a buffer. */ @@ -1263,6 +1320,23 @@ xfs_imap( return XFS_ERROR(EINVAL); } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + + /* + * For bulkstat and handle lookups, we have an untrusted inode number + * that we have to verify is valid. We cannot do this just by reading + * the inode buffer as it may have been unlinked and removed leaving + * inodes in stale state on disk. Hence we have to do a btree lookup + * in all cases where an untrusted inode number is passed. + */ + if (flags & XFS_IGET_BULKSTAT) { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + goto out_map; + } + /* * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. @@ -1277,10 +1351,8 @@ xfs_imap( return 0; } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; - /* - * If we get a block number passed from bulkstat we can use it to + * If we get a block number passed we can use it to * find the buffer easily. */ if (imap->im_blkno) { @@ -1304,50 +1376,13 @@ xfs_imap( offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_inobt_rec_incore_t chunk_rec; - xfs_buf_t *agbp; /* agi buffer */ - int i; /* temp state */ - - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); - return error; - } - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_lookup() failed"); - goto error0; - } - - error = xfs_inobt_get_rec(cur, &chunk_rec, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); - goto error0; - } - if (i == 0) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ - error = XFS_ERROR(EINVAL); - } - error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); - offset_agbno = agbno - chunk_agbno; } +out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); -- cgit v1.2.3 From 1920779e67cbf5ea8afef317777c5bf2b8096188 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 24 Jun 2010 11:15:47 +1000 Subject: xfs: rename XFS_IGET_BULKSTAT to XFS_IGET_UNTRUSTED Inode numbers may come from somewhere external to the filesystem (e.g. file handles, bulkstat information) and so are inherently untrusted. Rename the flag we use for these lookups to make it obvious we are doing a lookup of an untrusted inode number and need to verify it completely before trying to read it from disk. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ialloc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0c946c8e05da..d8fd36685eb9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1251,7 +1251,7 @@ xfs_imap_lookup( return error; /* for untrusted inodes check it is allocated first */ - if ((flags & XFS_IGET_BULKSTAT) && + if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) return EINVAL; @@ -1292,8 +1292,11 @@ xfs_imap( if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG - /* no diagnostics for bulkstat, ino comes from userspace */ - if (flags & XFS_IGET_BULKSTAT) + /* + * Don't output diagnostic information for untrusted inodes + * as they can be invalid without implying corruption. + */ + if (flags & XFS_IGET_UNTRUSTED) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, @@ -1329,7 +1332,7 @@ xfs_imap( * inodes in stale state on disk. Hence we have to do a btree lookup * in all cases where an untrusted inode number is passed. */ - if (flags & XFS_IGET_BULKSTAT) { + if (flags & XFS_IGET_UNTRUSTED) { error = xfs_imap_lookup(mp, tp, agno, agino, agbno, &chunk_agbno, &offset_agbno, flags); if (error) -- cgit v1.2.3 From 7b6259e7a83647948fa33a736cc832310c8d85aa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 24 Jun 2010 11:35:17 +1000 Subject: xfs: remove block number from inode lookup code The block number comes from bulkstat based inode lookups to shortcut the mapping calculations. We ar enot able to trust anything from bulkstat, so drop the block number as well so that the correct lookups and mappings are always done. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ialloc.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index d8fd36685eb9..c7142a064c48 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1354,22 +1354,6 @@ xfs_imap( return 0; } - /* - * If we get a block number passed we can use it to - * find the buffer easily. - */ - if (imap->im_blkno) { - offset = XFS_INO_TO_OFFSET(mp, ino); - ASSERT(offset < mp->m_sb.sb_inopblock); - - cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno); - offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; - - imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); - return 0; - } - /* * If the inode chunks are aligned then use simple maths to * find the location. Otherwise we have to do a btree -- cgit v1.2.3 From 288699fecaffa1ef8f75f92020cbb593a772e487 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2010 18:11:15 +1000 Subject: xfs: drop dmapi hooks Dmapi support was never merged upstream, but we still have a lot of hooks bloating XFS for it, all over the fast pathes of the filesystem. This patch drops over 700 lines of dmapi overhead. If we'll ever get HSM support in mainline at least the namespace events can be done much saner in the VFS instead of the individual filesystem, so it's not like this is much help for future work. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c7142a064c48..ab76d73d54e9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -25,7 +25,6 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -- cgit v1.2.3 From 3400777ff03a3cd4fdbc6cb15676fc7e7ceefc00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2010 18:11:15 +1000 Subject: xfs: remove unneeded #include statements Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index ab76d73d54e9..abf80ae1e95b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -24,13 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -- cgit v1.2.3