[PATCH 74/76] xfs: set up per-AG preallocated block pools

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



One unfortunate quirk of the reference count btree -- it can expand in
size when blocks are written to *other* allocation groups if, say, one
large extent becomes a lot of tiny extents.  Since we don't want to
start throwing errors in the middle of CoWing, establish a pool of
reserved blocks in each AG to feed such an expansion.  Reserved pools
can be large enough to obviate the need for external allocations and
use EFI/EFDs so that the the reserved blocks will be freed if the
system crashes.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/Makefile                |    1 
 fs/xfs/libxfs/xfs_perag_pool.c |  379 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_perag_pool.h |   47 +++++
 fs/xfs/xfs_trace.h             |   15 ++
 4 files changed, 442 insertions(+)
 create mode 100644 fs/xfs/libxfs/xfs_perag_pool.c
 create mode 100644 fs/xfs/libxfs/xfs_perag_pool.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 798e2b0..d2ab008 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -51,6 +51,7 @@ xfs-y				+= $(addprefix libxfs/, \
 				   xfs_inode_fork.o \
 				   xfs_inode_buf.o \
 				   xfs_log_rlimit.o \
+				   xfs_perag_pool.o \
 				   xfs_rmap.o \
 				   xfs_rmap_btree.o \
 				   xfs_refcount.o \
diff --git a/fs/xfs/libxfs/xfs_perag_pool.c b/fs/xfs/libxfs/xfs_perag_pool.c
new file mode 100644
index 0000000..b49ffd2
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_perag_pool.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_perag_pool.h"
+#include "xfs_trans_space.h"
+
+/**
+ * xfs_perag_pool_free() -- Free a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_free(
+	struct xfs_perag_pool		*p)
+{
+	struct xfs_mount		*mp;
+	struct xfs_perag_pool_entry	*ppe, *n;
+	struct xfs_trans		*tp;
+	xfs_fsblock_t			fsb;
+	struct xfs_bmap_free		freelist;
+	int				committed;
+	int				error = 0, err;
+
+	if (!p)
+		return 0;
+
+	mp = p->pp_mount;
+	list_for_each_entry_safe(ppe, n, &p->pp_entries, ppe_list) {
+		list_del(&ppe->ppe_list);
+		if (XFS_FORCED_SHUTDOWN(mp)) {
+			kmem_free(ppe);
+			continue;
+		}
+
+		/* Set up transaction. */
+		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		tp->t_flags |= XFS_TRANS_RESERVE;
+		err = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 0, 0);
+		if (err)
+			goto loop_cancel;
+		xfs_bmap_init(&freelist, &fsb);
+		fsb = XFS_AGB_TO_FSB(p->pp_mount, p->pp_agno, ppe->ppe_bno);
+
+		trace_xfs_perag_pool_free_extent(mp, p->pp_agno, ppe->ppe_bno,
+				ppe->ppe_len, &p->pp_oinfo);
+
+		/* Free the block. */
+		xfs_bmap_add_free(mp, &freelist, fsb, ppe->ppe_len,
+				&p->pp_oinfo);
+
+		err = xfs_bmap_finish(&tp, &freelist, &committed, NULL);
+		if (err)
+			goto loop_cancel;
+
+		err = xfs_trans_commit(tp);
+		if (!error)
+			error = err;
+		kmem_free(ppe);
+		continue;
+loop_cancel:
+		if (!error)
+			error = err;
+		xfs_trans_cancel(tp);
+		kmem_free(ppe);
+	}
+
+	kmem_free(p);
+	if (error)
+		trace_xfs_perag_pool_free_error(mp, p->pp_agno, error,
+				_RET_IP_);
+	return error;
+}
+
+/* Allocate a block for the pool. */
+static int
+xfs_perag_pool_grab_block(
+	struct xfs_perag_pool		*p,
+	struct xfs_trans		*tp,
+	xfs_extlen_t			*len)
+{
+	struct xfs_mount		*mp;
+	struct xfs_perag_pool_entry	*ppe;
+	struct xfs_alloc_arg		args;
+	int				error;
+
+	mp = p->pp_mount;
+
+	/* Set up the allocation. */
+	memset(&args, 0, sizeof(args));
+	args.mp = mp;
+	args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	args.fsbno = XFS_AGB_TO_FSB(mp, p->pp_agno, p->pp_agbno);
+	args.firstblock = args.fsbno;
+	args.oinfo = p->pp_oinfo;
+	args.minlen = 1;
+
+	/* Allocate blocks. */
+	args.tp = tp;
+	args.maxlen = args.prod = *len;
+	p->pp_allocating = true;
+	error = xfs_alloc_vextent(&args);
+	p->pp_allocating = false;
+	if (error)
+		goto out_error;
+	if (args.fsbno == NULLFSBLOCK) {
+		/* oh well, we're headed towards failure. */
+		error = -ENOSPC;
+		goto out_error;
+	}
+	*len = args.len;
+
+	trace_xfs_perag_pool_grab_block(mp, p->pp_agno, args.agbno, args.len,
+			&p->pp_oinfo);
+
+	/* Add to our list. */
+	ASSERT(args.agno == p->pp_agno);
+	ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+	ppe->ppe_bno = args.agbno;
+	ppe->ppe_len = args.len;
+	list_add_tail(&ppe->ppe_list, &p->pp_entries);
+	return 0;
+
+out_error:
+	trace_xfs_perag_pool_grab_block_error(mp, p->pp_agno, error, _RET_IP_);
+	return error;
+}
+
+/* Ensure the pool has some capacity. */
+static int
+__xfs_perag_pool_ensure_capacity(
+	struct xfs_perag_pool		*p,
+	xfs_extlen_t			sz,
+	bool				force)
+{
+	struct xfs_mount		*mp = p->pp_mount;
+	struct xfs_trans		*tp;
+	struct xfs_perag		*pag;
+	uint				resblks;
+	xfs_extlen_t			alloc_len;
+	int				error;
+
+	if (sz <= p->pp_len - p->pp_inuse)
+		return 0;
+	sz -= p->pp_len - p->pp_inuse;
+
+	trace_xfs_perag_pool_ensure_capacity(mp, p->pp_agno,
+			p->pp_len - p->pp_inuse, sz, &p->pp_oinfo);
+
+	/* Do we even have enough free blocks? */
+	pag = xfs_perag_get(mp, p->pp_agno);
+	resblks = pag->pagf_freeblks;
+	xfs_perag_put(pag);
+	if (force && resblks < sz)
+		sz = resblks;
+	if (resblks < sz) {
+		error = -ENOSPC;
+		goto out_error;
+	}
+
+	while (sz) {
+		/* Set up a transaction */
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, sz);
+		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
+		if (error)
+			goto out_cancel;
+
+		/* Allocate the blocks */
+		alloc_len = sz;
+		error = xfs_perag_pool_grab_block(p, tp, &alloc_len);
+		if (error)
+			goto out_cancel;
+
+		/* Commit the transaction */
+		error = xfs_trans_commit(tp);
+		if (error)
+			goto out_error;
+
+		p->pp_len += alloc_len;
+		sz -= alloc_len;
+	}
+	return 0;
+
+out_cancel:
+	xfs_trans_cancel(tp);
+out_error:
+	trace_xfs_perag_pool_ensure_capacity_error(mp, p->pp_agno, error,
+			_RET_IP_);
+	return error;
+}
+
+/**
+ * xfs_perag_pool_ensure_capacity() -- Ensure the pool has some capacity.
+ *
+ * @p: per-AG reserved blocks pool.
+ * @sz: Ensure that there are at least this many free blocks.
+ */
+int
+xfs_perag_pool_ensure_capacity(
+	struct xfs_perag_pool		*p,
+	xfs_extlen_t			sz)
+{
+	if (!p)
+		return 0;
+	return __xfs_perag_pool_ensure_capacity(p, sz, false);
+}
+
+/**
+ * xfs_perag_pool_init() -- Initialize a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_init(
+	struct xfs_mount		*mp,
+	xfs_agnumber_t			agno,
+	xfs_agblock_t			agbno,
+	xfs_extlen_t			len,
+	xfs_extlen_t			inuse,
+	uint64_t			owner,
+	struct xfs_perag_pool		**pp)
+{
+	struct xfs_perag_pool		*p;
+	struct xfs_owner_info		oinfo;
+	int				error;
+
+	XFS_RMAP_AG_OWNER(&oinfo, owner);
+	trace_xfs_perag_pool_init(mp, agno, agbno, len, &oinfo);
+	trace_xfs_perag_pool_init(mp, agno, agbno, inuse, &oinfo);
+
+	p = kmem_alloc(sizeof(struct xfs_perag_pool), KM_SLEEP);
+	p->pp_mount = mp;
+	p->pp_agno = agno;
+	p->pp_agbno = agbno;
+	p->pp_inuse = p->pp_len = inuse;
+	p->pp_oinfo = oinfo;
+	p->pp_allocating = false;
+	INIT_LIST_HEAD(&p->pp_entries);
+	*pp = p;
+
+	/* Try to reserve some blocks. */
+	error = __xfs_perag_pool_ensure_capacity(p, len - inuse, true);
+	if (error == -ENOSPC)
+		error = 0;
+
+	if (error)
+		trace_xfs_perag_pool_init_error(mp, agno, error, _RET_IP_);
+	return error;
+}
+
+/**
+ * xfs_perag_pool_alloc_block() -- Allocate a block from the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the allocation.
+ * @bno: (out) The allocated block number.
+ */
+int
+xfs_perag_pool_alloc_block(
+	struct xfs_perag_pool		*p,
+	struct xfs_trans		*tp,
+	xfs_agblock_t			*bno)
+{
+	struct xfs_mount		*mp;
+	struct xfs_perag_pool_entry	*ppe;
+	xfs_extlen_t			len;
+	int				error;
+
+	if (p == NULL || p->pp_allocating)
+		return -EINVAL;
+
+	mp = p->pp_mount;
+	mp = mp;
+	/* Empty pool?  Grab another block. */
+	if (list_empty(&p->pp_entries)) {
+		len = 1;
+		error = xfs_perag_pool_grab_block(p, tp, &len);
+		if (error)
+			goto err;
+		ASSERT(len == 1);
+		if (list_empty(&p->pp_entries)) {
+			error = -ENOSPC;
+			goto err;
+		}
+	}
+
+	/* Find an available block. */
+	ppe = list_first_entry(&p->pp_entries, struct xfs_perag_pool_entry,
+			ppe_list);
+	*bno = ppe->ppe_bno;
+
+	trace_xfs_perag_pool_alloc_block(mp, p->pp_agno, *bno, 1, &p->pp_oinfo);
+
+	/* Update the accounting. */
+	ppe->ppe_len--;
+	ppe->ppe_bno++;
+	if (ppe->ppe_len == 0)
+		list_del(&ppe->ppe_list);
+	p->pp_inuse++;
+
+	return 0;
+err:
+	trace_xfs_perag_pool_alloc_block_error(mp, p->pp_agno, error, _RET_IP_);
+	return error;
+}
+
+/**
+ * xfs_perag_pool_free_block() -- Put a block back in the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the free operation.
+ * @bno: Block to put back.
+ */
+int
+xfs_perag_pool_free_block(
+	struct xfs_perag_pool		*p,
+	struct xfs_trans		*tp,
+	xfs_agblock_t			bno)
+{
+	struct xfs_mount		*mp;
+	struct xfs_perag_pool_entry	*ppe;
+
+	if (p == NULL)
+		return -EINVAL;
+
+	mp = p->pp_mount;
+	mp = mp;
+	trace_xfs_perag_pool_free_block(mp, p->pp_agno, bno, 1, &p->pp_oinfo);
+
+	list_for_each_entry(ppe, &p->pp_entries, ppe_list) {
+		if (ppe->ppe_bno - 1 == bno) {
+
+			/* Adjust bookkeeping. */
+			p->pp_inuse--;
+			ppe->ppe_bno--;
+			ppe->ppe_len++;
+			return 0;
+		}
+		if (ppe->ppe_bno + ppe->ppe_len == bno) {
+			p->pp_inuse--;
+			ppe->ppe_len++;
+			return 0;
+		}
+	}
+	ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+	ppe->ppe_bno = bno;
+	ppe->ppe_len = 1;
+	p->pp_inuse--;
+
+	list_add_tail(&ppe->ppe_list, &p->pp_entries);
+	return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_perag_pool.h b/fs/xfs/libxfs/xfs_perag_pool.h
new file mode 100644
index 0000000..ecdcd2a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_perag_pool.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+struct xfs_perag_pool_entry {
+	struct list_head	ppe_list;	/* pool list */
+	xfs_agblock_t		ppe_bno;	/* AG block number */
+	xfs_extlen_t		ppe_len;	/* length */
+};
+
+struct xfs_perag_pool {
+	struct xfs_mount	*pp_mount;	/* XFS mount */
+	xfs_agnumber_t		pp_agno;	/* AG number */
+	xfs_agblock_t		pp_agbno;	/* suggested AG block number */
+	xfs_extlen_t		pp_len;		/* blocks in pool */
+	xfs_extlen_t		pp_inuse;	/* blocks in use */
+	struct xfs_owner_info	pp_oinfo;	/* owner */
+	struct list_head	pp_entries;	/* pool entries */
+	bool			pp_allocating;	/* are we allocating? */
+};
+
+int xfs_perag_pool_free(struct xfs_perag_pool *p);
+int xfs_perag_pool_init(struct xfs_mount *mp, xfs_agnumber_t agno,
+		xfs_agblock_t agbno, xfs_extlen_t len, xfs_extlen_t inuse,
+		uint64_t owner, struct xfs_perag_pool **pp);
+
+int xfs_perag_pool_ensure_capacity(struct xfs_perag_pool *p, xfs_extlen_t sz);
+
+int xfs_perag_pool_alloc_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+		xfs_agblock_t *bno);
+int xfs_perag_pool_free_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+		xfs_agblock_t bno);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0773938..dad57dc 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3160,6 +3160,21 @@ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
 
+/* perag pool tracepoints */
+#define DEFINE_PERAG_POOL_EVENT	DEFINE_RMAP_EVENT
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_free_extent);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_grab_block);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_init);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_ensure_capacity);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_alloc_block);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_free_block);
+
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_free_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_grab_block_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_init_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_ensure_capacity_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_alloc_block_error);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux