To gracefully handle the situation where a CoW operation turns a single refcount extent into a lot of tiny ones and then run out of space when a tree split has to happen, use the per-AG reserved block pool to pre-allocate all the space we'll ever need for a maximal btree. For a 4K block size, this only costs an overhead of 0.3% of available disk space. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_refcount_btree.c | 184 ++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_refcount_btree.h | 3 + fs/xfs/xfs_fsops.c | 4 + fs/xfs/xfs_mount.c | 10 ++ fs/xfs/xfs_mount.h | 1 fs/xfs/xfs_super.c | 14 +++ 6 files changed, 216 insertions(+) diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index c785433..7f8bdc4 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -33,6 +33,7 @@ #include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_bit.h" +#include "xfs_perag_pool.h" static struct xfs_btree_cur * xfs_refcountbt_dup_cursor( @@ -72,8 +73,32 @@ xfs_refcountbt_alloc_block( int *stat) { struct xfs_alloc_arg args; /* block allocation args */ + struct xfs_perag *pag; + xfs_agblock_t bno; int error; /* error return value */ + /* First try the per-AG reserve pool. */ + pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno); + error = xfs_perag_pool_alloc_block(pag->pagf_refcountbt_pool, + cur->bc_tp, &bno); + xfs_perag_put(pag); + + switch (error) { + case 0: + *stat = 1; + new->s = cpu_to_be32(bno); + return 0; + case -EINVAL: + break; + case -ENOSPC: + error = 0; + /* fall through */ + default: + *stat = 0; + return error; + } + + /* No pool; try a regular allocation. */ memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; @@ -113,9 +138,27 @@ xfs_refcountbt_free_block( { struct xfs_mount *mp = cur->bc_mp; struct xfs_trans *tp = cur->bc_tp; + struct xfs_perag *pag; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); struct xfs_owner_info oinfo; + int error; + /* Try to give it back to the pool. */ + pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno); + error = xfs_perag_pool_free_block(pag->pagf_refcountbt_pool, cur->bc_tp, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno)); + xfs_perag_put(pag); + + switch (error) { + case 0: + return 0; + case -EINVAL: + break; + default: + return error; + } + + /* Return it to the AG. */ XFS_RMAP_AG_OWNER(&oinfo, XFS_RMAP_OWN_REFC); xfs_bmap_add_free(mp, cur->bc_private.a.flist, fsbno, 1, &oinfo); @@ -390,3 +433,144 @@ xfs_refcountbt_max_btree_size( return xfs_refcountbt_calc_btree_size(mp, mp->m_sb.sb_agblocks); } + +/* Count the blocks in the reference count tree. */ +static int +xfs_refcountbt_count_tree_blocks( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *tree_len) +{ + struct xfs_buf *agfbp; + struct xfs_buf *bp = NULL; + struct xfs_agf *agfp; + struct xfs_btree_block *block = NULL; + int level; + xfs_agblock_t bno; + xfs_fsblock_t fsbno; + __be32 *pp; + int error; + xfs_extlen_t nr_blocks = 0; + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agfbp); + if (error) + goto out; + agfp = XFS_BUF_TO_AGF(agfbp); + level = be32_to_cpu(agfp->agf_refcount_level); + bno = be32_to_cpu(agfp->agf_refcount_root); + + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. + */ + while (level-- > 0) { + fsbno = XFS_AGB_TO_FSB(mp, agno, bno); + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0, &bp, + &xfs_refcountbt_buf_ops); + if (error) + goto err; + block = XFS_BUF_TO_BLOCK(bp); + if (level == 0) + break; + pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]); + bno = be32_to_cpu(*pp); + xfs_trans_brelse(NULL, bp); + } + + /* Jog rightward though level zero. */ + while (block) { + nr_blocks++; + bno = be32_to_cpu(block->bb_u.s.bb_rightsib); + if (bno == NULLAGBLOCK) + break; + fsbno = XFS_AGB_TO_FSB(mp, agno, bno); + xfs_trans_brelse(NULL, bp); + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0, &bp, + &xfs_refcountbt_buf_ops); + if (error) + goto err; + block = XFS_BUF_TO_BLOCK(bp); + } + + if (bp) + xfs_trans_brelse(NULL, bp); + + /* Add in the upper levels of tree. */ + *tree_len = nr_blocks; +err: + xfs_trans_brelse(NULL, agfbp); +out: + return error; +} + +/** + * xfs_refcountbt_alloc_reserve_pool() -- Create reserved block pools for each + * allocation group. + */ +int +xfs_refcountbt_alloc_reserve_pool( + struct xfs_mount *mp) +{ + xfs_agnumber_t agno; + struct xfs_perag *pag; + xfs_extlen_t pool_len; + xfs_extlen_t tree_len; + int error = 0; + int err; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + pool_len = xfs_refcountbt_max_btree_size(mp); + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + pag = xfs_perag_get(mp, agno); + if (pag->pagf_refcountbt_pool) { + xfs_perag_put(pag); + continue; + } + tree_len = 0; + xfs_refcountbt_count_tree_blocks(mp, agno, &tree_len); + err = xfs_perag_pool_init(mp, agno, + xfs_refc_block(mp), + pool_len, tree_len, + XFS_RMAP_OWN_REFC, + &pag->pagf_refcountbt_pool); + xfs_perag_put(pag); + if (err && !error) + error = err; + } + + return error; +} + +/** + * xfs_refcountbt_free_reserve_pool() -- Free the reference count btree pools. + */ +int +xfs_refcountbt_free_reserve_pool( + struct xfs_mount *mp) +{ + xfs_agnumber_t agno; + struct xfs_perag *pag; + int error = 0; + int err; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + pag = xfs_perag_get(mp, agno); + err = xfs_perag_pool_free(pag->pagf_refcountbt_pool); + pag->pagf_refcountbt_pool = NULL; + xfs_perag_put(pag); + if (err && !error) + error = err; + } + + return error; +} diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 0f55544..93eebda 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -65,4 +65,7 @@ extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen, DECLARE_BTREE_SIZE_FN(refcountbt); extern unsigned int xfs_refcountbt_max_btree_size(struct xfs_mount *mp); +extern int xfs_refcountbt_alloc_reserve_pool(struct xfs_mount *mp); +extern int xfs_refcountbt_free_reserve_pool(struct xfs_mount *mp); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 920db9d..4158e07 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -41,6 +41,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_filestream.h" +#include "xfs_refcount_btree.h" /* * File system operations @@ -679,6 +680,9 @@ xfs_growfs_data_private( continue; } } + + error = xfs_refcountbt_alloc_reserve_pool(mp); + return saved_error ? saved_error : error; error0: diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 335bcad..29841d6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -41,6 +41,7 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_sysfs.h" +#include "xfs_refcount_btree.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); @@ -966,6 +967,10 @@ xfs_mountfs( if (error) xfs_warn(mp, "Unable to allocate reserve blocks. Continuing without reserve pool."); + error = xfs_refcountbt_alloc_reserve_pool(mp); + if (error) + xfs_err(mp, + "Error %d allocating refcount btree reserve blocks.", error); } return 0; @@ -1007,6 +1012,11 @@ xfs_unmountfs( __uint64_t resblks; int error; + error = xfs_refcountbt_free_reserve_pool(mp); + if (error) + xfs_warn(mp, + "Error %d freeing refcount btree reserve blocks.", error); + cancel_delayed_work_sync(&mp->m_eofblocks_work); xfs_qm_unmount_quotas(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index caed8d3..75ff130 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -321,6 +321,7 @@ typedef struct xfs_perag { /* reference count */ __uint8_t pagf_refcount_level; + struct xfs_perag_pool *pagf_refcountbt_pool; } xfs_perag_t; extern void xfs_uuid_table_free(void); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ede714b..87f44a2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -46,6 +46,7 @@ #include "xfs_quota.h" #include "xfs_sysfs.h" #include "xfs_reflink.h" +#include "xfs_refcount_btree.h" #include <linux/namei.h> #include <linux/init.h> @@ -1264,6 +1265,12 @@ xfs_fs_remount( */ xfs_restore_resvblks(mp); xfs_log_work_queue(mp); + + /* Save space for the refcount btree! */ + error = xfs_refcountbt_alloc_reserve_pool(mp); + if (error) + xfs_err(mp, + "Error %d allocating refcount btree reserve blocks.", error); } /* rw -> ro */ @@ -1275,6 +1282,13 @@ xfs_fs_remount( * reserve pool size so that if we get remounted rw, we can * return it to the same size. */ + + /* Save space for the refcount btree! */ + error = xfs_refcountbt_free_reserve_pool(mp); + if (error) + xfs_warn(mp, + "Error %d freeing refcount btree reserve blocks.", error); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs