From 44b56e0a1aed522a10051645e85d300e10926fd3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:43 +0000 Subject: xfs: convert remaining direct references to m_perag Convert the remaining direct lookups of the per ag structures to use get/put accesses. Ensure that the loops across AGs and prior users of the interface balance gets and puts correctly. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_ialloc.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index cb907ba69c4c..884ee1367f46 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc( xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ + struct xfs_perag *pag; args.tp = tp; args.mp = tp->t_mountp; @@ -383,7 +384,9 @@ xfs_ialloc_ag_alloc( be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); down_read(&args.mp->m_peraglock); - args.mp->m_perag[agno].pagi_freecount += newlen; + pag = xfs_perag_get(args.mp, agno); + pag->pagi_freecount += newlen; + xfs_perag_put(pag); up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); @@ -488,7 +491,7 @@ xfs_ialloc_ag_select( flags = XFS_ALLOC_FLAG_TRYLOCK; down_read(&mp->m_peraglock); for (;;) { - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; @@ -527,6 +530,7 @@ xfs_ialloc_ag_select( agbp = NULL; goto nextag; } + xfs_perag_put(pag); up_read(&mp->m_peraglock); return agbp; } @@ -535,6 +539,7 @@ unlock_nextag: if (agbp) xfs_trans_brelse(tp, agbp); nextag: + xfs_perag_put(pag); /* * No point in iterating over the rest, if we're shutting * down. @@ -672,6 +677,7 @@ xfs_dialloc( xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ + struct xfs_perag *pag; if (*IO_agbp == NULL) { @@ -772,11 +778,14 @@ nextag: return noroom ? ENOSPC : 0; } down_read(&mp->m_peraglock); - if (mp->m_perag[tagno].pagi_inodeok == 0) { + pag = xfs_perag_get(mp, tagno); + if (pag->pagi_inodeok == 0) { + xfs_perag_put(pag); up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + xfs_perag_put(pag); up_read(&mp->m_peraglock); if (error) goto nextag; @@ -790,6 +799,7 @@ nextag: */ agno = tagno; *IO_agbp = NULL; + pag = xfs_perag_get(mp, agno); restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); @@ -808,7 +818,6 @@ nextag: * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { - xfs_perag_t *pag = &mp->m_perag[agno]; int doneleft; /* done, to the left */ int doneright; /* done, to the right */ int searchdistance = 10; @@ -1007,7 +1016,7 @@ alloc_inode: be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[tagno].pagi_freecount--; + pag->pagi_freecount--; up_read(&mp->m_peraglock); error = xfs_check_agi_freecount(cur, agi); @@ -1016,12 +1025,14 @@ alloc_inode: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + xfs_perag_put(pag); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); return error; } @@ -1052,6 +1063,7 @@ xfs_difree( xfs_mount_t *mp; /* mount structure for filesystem */ int off; /* offset of inode in inode chunk */ xfs_inobt_rec_incore_t rec; /* btree record */ + struct xfs_perag *pag; mp = tp->t_mountp; @@ -1158,7 +1170,9 @@ xfs_difree( be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount -= ilen - 1; + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount -= ilen - 1; + xfs_perag_put(pag); up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1189,7 +1203,9 @@ xfs_difree( be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount++; + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount++; + xfs_perag_put(pag); up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1379,7 +1395,6 @@ xfs_imap( XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); return XFS_ERROR(EINVAL); } - return 0; } @@ -1523,8 +1538,7 @@ xfs_ialloc_read_agi( return error; agi = XFS_BUF_TO_AGI(*bpp); - pag = &mp->m_perag[agno]; - + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); @@ -1537,6 +1551,7 @@ xfs_ialloc_read_agi( */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); + xfs_perag_put(pag); return 0; } -- cgit v1.2.3 From 1c1c6ebcf5284aee4910f3b906ac90c20e510c82 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:44 +0000 Subject: xfs: Replace per-ag array with a radix tree The use of an array for the per-ag structures requires reallocation of the array when growing the filesystem. This requires locking access to the array to avoid use after free situations, and the locking is difficult to get right. To avoid needing to reallocate an array, change the per-ag structures to an allocated object per ag and index them using a tree structure. The AGs are always densely indexed (hence the use of an array), but the number supported is 2^32 and lookups tend to be random and hence indexing needs to scale. A simple choice is a radix tree - it works well with this sort of index. This change also removes another large contiguous allocation from the mount/growfs path in XFS. The growing process now needs to change to only initialise the new AGs required for the extra space, and as such only needs to exclusively lock the tree for inserts. The rest of the code only needs to lock the tree while doing lookups, and hence this will remove all the deadlocks that currently occur on the m_perag_lock as it is now an innermost lock. The lock is also changed to a spinlock from a read/write lock as the hold time is now extremely short. To complete the picture, the per-ag structures will need to be reference counted to ensure that we don't free/modify them while they are still in use. This will be done in subsequent patch. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_ialloc.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 884ee1367f46..52c9d006c0e6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -383,11 +383,9 @@ xfs_ialloc_ag_alloc( newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - down_read(&args.mp->m_peraglock); pag = xfs_perag_get(args.mp, agno); pag->pagi_freecount += newlen; xfs_perag_put(pag); - up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* @@ -489,7 +487,6 @@ xfs_ialloc_ag_select( */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; - down_read(&mp->m_peraglock); for (;;) { pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { @@ -531,7 +528,6 @@ xfs_ialloc_ag_select( goto nextag; } xfs_perag_put(pag); - up_read(&mp->m_peraglock); return agbp; } } @@ -544,18 +540,14 @@ nextag: * No point in iterating over the rest, if we're shutting * down. */ - if (XFS_FORCED_SHUTDOWN(mp)) { - up_read(&mp->m_peraglock); + if (XFS_FORCED_SHUTDOWN(mp)) return NULL; - } agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { - if (flags == 0) { - up_read(&mp->m_peraglock); + if (flags == 0) return NULL; - } flags = 0; } } @@ -777,16 +769,13 @@ nextag: *inop = NULLFSINO; return noroom ? ENOSPC : 0; } - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, tagno); if (pag->pagi_inodeok == 0) { xfs_perag_put(pag); - up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); xfs_perag_put(pag); - up_read(&mp->m_peraglock); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); @@ -1015,9 +1004,7 @@ alloc_inode: goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag->pagi_freecount--; - up_read(&mp->m_peraglock); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1100,9 +1087,7 @@ xfs_difree( /* * Get the allocation group header. */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", @@ -1169,11 +1154,9 @@ xfs_difree( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount -= ilen - 1; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1202,11 +1185,9 @@ xfs_difree( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount++; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1328,9 +1309,7 @@ xfs_imap( xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " -- cgit v1.2.3 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 52c9d006c0e6..9d884c127bb9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -205,7 +205,7 @@ xfs_ialloc_inode_init( d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); -- cgit v1.2.3