[PATCH 75/76] xfs: preallocate blocks for worst-case refcount btree expansion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To gracefully handle the situation where a CoW operation turns a
single refcount extent into a lot of tiny ones and then run out of
space when a tree split has to happen, use the per-AG reserved block
pool to pre-allocate all the space we'll ever need for a maximal
btree.  For a 4K block size, this only costs an overhead of 0.3% of
available disk space.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_refcount_btree.c |  184 ++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_refcount_btree.h |    3 +
 fs/xfs/xfs_fsops.c                 |    4 +
 fs/xfs/xfs_mount.c                 |   10 ++
 fs/xfs/xfs_mount.h                 |    1 
 fs/xfs/xfs_super.c                 |   14 +++
 6 files changed, 216 insertions(+)


diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index c785433..7f8bdc4 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -33,6 +33,7 @@
 #include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_bit.h"
+#include "xfs_perag_pool.h"
 
 static struct xfs_btree_cur *
 xfs_refcountbt_dup_cursor(
@@ -72,8 +73,32 @@ xfs_refcountbt_alloc_block(
 	int			*stat)
 {
 	struct xfs_alloc_arg	args;		/* block allocation args */
+	struct xfs_perag	*pag;
+	xfs_agblock_t		bno;
 	int			error;		/* error return value */
 
+	/* First try the per-AG reserve pool. */
+	pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno);
+	error = xfs_perag_pool_alloc_block(pag->pagf_refcountbt_pool,
+			cur->bc_tp, &bno);
+	xfs_perag_put(pag);
+
+	switch (error) {
+	case 0:
+		*stat = 1;
+		new->s = cpu_to_be32(bno);
+		return 0;
+	case -EINVAL:
+		break;
+	case -ENOSPC:
+		error = 0;
+		/* fall through */
+	default:
+		*stat = 0;
+		return error;
+	}
+
+	/* No pool; try a regular allocation. */
 	memset(&args, 0, sizeof(args));
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
@@ -113,9 +138,27 @@ xfs_refcountbt_free_block(
 {
 	struct xfs_mount	*mp = cur->bc_mp;
 	struct xfs_trans	*tp = cur->bc_tp;
+	struct xfs_perag	*pag;
 	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
 	struct xfs_owner_info	oinfo;
+	int			error;
 
+	/* Try to give it back to the pool. */
+	pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno);
+	error = xfs_perag_pool_free_block(pag->pagf_refcountbt_pool, cur->bc_tp,
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno));
+	xfs_perag_put(pag);
+
+	switch (error) {
+	case 0:
+		return 0;
+	case -EINVAL:
+		break;
+	default:
+		return error;
+	}
+
+	/* Return it to the AG. */
 	XFS_RMAP_AG_OWNER(&oinfo, XFS_RMAP_OWN_REFC);
 	xfs_bmap_add_free(mp, cur->bc_private.a.flist, fsbno, 1,
 			&oinfo);
@@ -390,3 +433,144 @@ xfs_refcountbt_max_btree_size(
 
 	return xfs_refcountbt_calc_btree_size(mp, mp->m_sb.sb_agblocks);
 }
+
+/* Count the blocks in the reference count tree. */
+static int
+xfs_refcountbt_count_tree_blocks(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	xfs_extlen_t		*tree_len)
+{
+	struct xfs_buf		*agfbp;
+	struct xfs_buf		*bp = NULL;
+	struct xfs_agf		*agfp;
+	struct xfs_btree_block	*block = NULL;
+	int			level;
+	xfs_agblock_t		bno;
+	xfs_fsblock_t		fsbno;
+	__be32			*pp;
+	int			error;
+	xfs_extlen_t		nr_blocks = 0;
+
+	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agfbp);
+	if (error)
+		goto out;
+	agfp = XFS_BUF_TO_AGF(agfbp);
+	level = be32_to_cpu(agfp->agf_refcount_level);
+	bno = be32_to_cpu(agfp->agf_refcount_root);
+
+	/*
+	 * Go down the tree until leaf level is reached, following the first
+	 * pointer (leftmost) at each level.
+	 */
+	while (level-- > 0) {
+		fsbno = XFS_AGB_TO_FSB(mp, agno, bno);
+		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+				XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0, &bp,
+				&xfs_refcountbt_buf_ops);
+		if (error)
+			goto err;
+		block = XFS_BUF_TO_BLOCK(bp);
+		if (level == 0)
+			break;
+		pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
+		bno = be32_to_cpu(*pp);
+		xfs_trans_brelse(NULL, bp);
+	}
+
+	/* Jog rightward though level zero. */
+	while (block) {
+		nr_blocks++;
+		bno = be32_to_cpu(block->bb_u.s.bb_rightsib);
+		if (bno == NULLAGBLOCK)
+			break;
+		fsbno = XFS_AGB_TO_FSB(mp, agno, bno);
+		xfs_trans_brelse(NULL, bp);
+		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+				XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0, &bp,
+				&xfs_refcountbt_buf_ops);
+		if (error)
+			goto err;
+		block = XFS_BUF_TO_BLOCK(bp);
+	}
+
+	if (bp)
+		xfs_trans_brelse(NULL, bp);
+
+	/* Add in the upper levels of tree. */
+	*tree_len = nr_blocks;
+err:
+	xfs_trans_brelse(NULL, agfbp);
+out:
+	return error;
+}
+
+/**
+ * xfs_refcountbt_alloc_reserve_pool() -- Create reserved block pools for each
+ *					  allocation group.
+ */
+int
+xfs_refcountbt_alloc_reserve_pool(
+	struct xfs_mount	*mp)
+{
+	xfs_agnumber_t		agno;
+	struct xfs_perag	*pag;
+	xfs_extlen_t		pool_len;
+	xfs_extlen_t		tree_len;
+	int			error = 0;
+	int			err;
+
+	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+		return 0;
+
+	pool_len = xfs_refcountbt_max_btree_size(mp);
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		pag = xfs_perag_get(mp, agno);
+		if (pag->pagf_refcountbt_pool) {
+			xfs_perag_put(pag);
+			continue;
+		}
+		tree_len = 0;
+		xfs_refcountbt_count_tree_blocks(mp, agno, &tree_len);
+		err = xfs_perag_pool_init(mp, agno,
+				xfs_refc_block(mp),
+				pool_len, tree_len,
+				XFS_RMAP_OWN_REFC,
+				&pag->pagf_refcountbt_pool);
+		xfs_perag_put(pag);
+		if (err && !error)
+			error = err;
+	}
+
+	return error;
+}
+
+/**
+ * xfs_refcountbt_free_reserve_pool() -- Free the reference count btree pools.
+ */
+int
+xfs_refcountbt_free_reserve_pool(
+	struct xfs_mount	*mp)
+{
+	xfs_agnumber_t		agno;
+	struct xfs_perag	*pag;
+	int			error = 0;
+	int			err;
+
+	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+		return 0;
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		pag = xfs_perag_get(mp, agno);
+		err = xfs_perag_pool_free(pag->pagf_refcountbt_pool);
+		pag->pagf_refcountbt_pool = NULL;
+		xfs_perag_put(pag);
+		if (err && !error)
+			error = err;
+	}
+
+	return error;
+}
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 0f55544..93eebda 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -65,4 +65,7 @@ extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen,
 DECLARE_BTREE_SIZE_FN(refcountbt);
 extern unsigned int xfs_refcountbt_max_btree_size(struct xfs_mount *mp);
 
+extern int xfs_refcountbt_alloc_reserve_pool(struct xfs_mount *mp);
+extern int xfs_refcountbt_free_reserve_pool(struct xfs_mount *mp);
+
 #endif	/* __XFS_REFCOUNT_BTREE_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 920db9d..4158e07 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -41,6 +41,7 @@
 #include "xfs_trace.h"
 #include "xfs_log.h"
 #include "xfs_filestream.h"
+#include "xfs_refcount_btree.h"
 
 /*
  * File system operations
@@ -679,6 +680,9 @@ xfs_growfs_data_private(
 			continue;
 		}
 	}
+
+	error = xfs_refcountbt_alloc_reserve_pool(mp);
+
 	return saved_error ? saved_error : error;
 
  error0:
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 335bcad..29841d6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_sysfs.h"
+#include "xfs_refcount_btree.h"
 
 
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -966,6 +967,10 @@ xfs_mountfs(
 		if (error)
 			xfs_warn(mp,
 	"Unable to allocate reserve blocks. Continuing without reserve pool.");
+		error = xfs_refcountbt_alloc_reserve_pool(mp);
+		if (error)
+			xfs_err(mp,
+	"Error %d allocating refcount btree reserve blocks.", error);
 	}
 
 	return 0;
@@ -1007,6 +1012,11 @@ xfs_unmountfs(
 	__uint64_t		resblks;
 	int			error;
 
+	error = xfs_refcountbt_free_reserve_pool(mp);
+	if (error)
+		xfs_warn(mp,
+	"Error %d freeing refcount btree reserve blocks.", error);
+
 	cancel_delayed_work_sync(&mp->m_eofblocks_work);
 
 	xfs_qm_unmount_quotas(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index caed8d3..75ff130 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -321,6 +321,7 @@ typedef struct xfs_perag {
 
 	/* reference count */
 	__uint8_t	pagf_refcount_level;
+	struct xfs_perag_pool	*pagf_refcountbt_pool;
 } xfs_perag_t;
 
 extern void	xfs_uuid_table_free(void);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ede714b..87f44a2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -46,6 +46,7 @@
 #include "xfs_quota.h"
 #include "xfs_sysfs.h"
 #include "xfs_reflink.h"
+#include "xfs_refcount_btree.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -1264,6 +1265,12 @@ xfs_fs_remount(
 		 */
 		xfs_restore_resvblks(mp);
 		xfs_log_work_queue(mp);
+
+		/* Save space for the refcount btree! */
+		error = xfs_refcountbt_alloc_reserve_pool(mp);
+		if (error)
+			xfs_err(mp,
+	"Error %d allocating refcount btree reserve blocks.", error);
 	}
 
 	/* rw -> ro */
@@ -1275,6 +1282,13 @@ xfs_fs_remount(
 		 * reserve pool size so that if we get remounted rw, we can
 		 * return it to the same size.
 		 */
+
+		/* Save space for the refcount btree! */
+		error = xfs_refcountbt_free_reserve_pool(mp);
+		if (error)
+			xfs_warn(mp,
+	"Error %d freeing refcount btree reserve blocks.", error);
+
 		xfs_save_resvblks(mp);
 		xfs_quiesce_attr(mp);
 		mp->m_flags |= XFS_MOUNT_RDONLY;

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux