[PATCH RFC 5/5] xfs: new cntbt based near allocation algorithm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Prototype a new NEAR type allocation algorithm. Use both the
by-block and by-size btrees to allocate the largest available extent
with ideal locality.

Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_alloc.c | 405 +++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_trace.h        |   1 +
 2 files changed, 405 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 95ee48e3fb9d..96aef9f9eaca 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1315,6 +1315,392 @@ xfs_alloc_ag_vextent_near_bnobt(
 
 }
 
+enum xfs_alloc_cur {
+	XFS_ALLOC_CNT = 0,
+	XFS_ALLOC_BNOLT,
+	XFS_ALLOC_BNOGT,
+	XFS_ALLOC_MAX
+};
+
+struct xfs_alloc_best {
+	struct xfs_btree_cur	*cur[XFS_ALLOC_MAX];
+	xfs_extlen_t		cur_len;	/* current search length */
+	xfs_agblock_t		rec_bno;	/* record startblock */
+	xfs_extlen_t		rec_len;	/* record length */
+	xfs_agblock_t		bno;		/* alloc bno */
+	xfs_extlen_t		len;		/* alloc len */
+	xfs_extlen_t		diff;		/* diff from search bno */
+	bool			done;
+	bool			busy;
+	unsigned		busy_gen;
+};
+
+static int
+xfs_alloc_best_setup(
+	struct xfs_alloc_arg	*args,
+	struct xfs_alloc_best	*best)
+{
+	int			error;
+	int			i;
+
+	best->diff = -1;
+
+	best->cur[XFS_ALLOC_CNT] = xfs_allocbt_init_cursor(args->mp, args->tp,
+					args->agbp, args->agno, XFS_BTNUM_CNT);
+
+	best->cur[XFS_ALLOC_BNOLT] = xfs_allocbt_init_cursor(args->mp, args->tp,
+					args->agbp, args->agno, XFS_BTNUM_BNO);
+	error = xfs_alloc_lookup_le(best->cur[XFS_ALLOC_BNOLT], args->agbno,
+				    args->maxlen, &i);
+	if (error)
+		return error;
+	if (!i) {
+		best->cur[XFS_ALLOC_BNOGT] = best->cur[XFS_ALLOC_BNOLT];
+		best->cur[XFS_ALLOC_BNOLT] = NULL;
+	} else {
+		xfs_btree_dup_cursor(best->cur[XFS_ALLOC_BNOLT],
+				     &best->cur[XFS_ALLOC_BNOGT]);
+	}
+
+	error = xfs_btree_increment(best->cur[XFS_ALLOC_BNOGT], 0, &i);
+	if (error)
+		return error;
+	if (!i) {
+		xfs_btree_del_cursor(best->cur[XFS_ALLOC_BNOGT],
+				     XFS_BTREE_NOERROR);
+		best->cur[XFS_ALLOC_BNOGT] = NULL;
+	}
+
+	return error;
+}
+
+static inline void
+__xfs_alloc_best_close(
+	struct xfs_alloc_best	*best,
+	enum xfs_alloc_cur	cidx,
+	int			error)
+{
+	if (!best->cur[cidx])
+		return;
+
+	xfs_btree_del_cursor(best->cur[cidx], error);
+	best->cur[cidx] = NULL;
+}
+
+static void
+xfs_alloc_best_close(
+	struct xfs_alloc_best	*best,
+	bool			error)
+{
+	int			cur_error = XFS_BTREE_NOERROR;
+	int			i;
+
+	if (error)
+		cur_error = XFS_BTREE_ERROR;
+	for (i = 0; i < XFS_ALLOC_MAX; i++)
+		__xfs_alloc_best_close(best, i, cur_error);
+}
+
+static bool
+xfs_alloc_best_check(
+	struct xfs_alloc_arg	*args,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_alloc_best	*best,
+	enum xfs_alloc_cur	cidx)
+{
+	xfs_agblock_t		bnoa, bnew;
+	xfs_extlen_t		lena, diff = -1;
+	bool			busy;
+	unsigned		busy_gen = 0;
+	bool			badrange = false;
+	bool			isbnobt = false;
+
+	isbnobt = (cidx == XFS_ALLOC_BNOLT || cidx == XFS_ALLOC_BNOGT);
+
+	busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
+					 &busy_gen);
+	best->busy |= busy;
+	if (busy)
+		best->busy_gen = busy_gen;
+
+	if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
+		badrange = true;
+		goto fail;
+	}
+	if (lena < args->minlen)
+		goto fail;
+
+	args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
+	xfs_alloc_fix_len(args);
+	ASSERT(args->len >= args->minlen);
+	if (args->len < best->len)
+		goto fail;
+
+	diff = xfs_alloc_compute_diff(args->agbno, args->len,
+				      args->alignment, args->datatype,
+				      bnoa, lena, &bnew);
+	if (bnew == NULLAGBLOCK)
+		goto fail;
+	if (diff > best->diff) {
+		badrange = true;
+		goto fail;
+	}
+	if (args->len < best->len)
+		goto fail;
+
+	best->rec_bno = bno;
+	best->rec_len = len;
+	best->bno = bnew;
+	best->len = args->len;
+	best->diff = diff;
+	return true;
+
+fail:
+	/* close bnobt cursors once out of range to prevent further searching */
+	if (isbnobt && badrange)
+		__xfs_alloc_best_close(best, cidx, XFS_BTREE_NOERROR);
+	return false;
+}
+
+/*
+ * Complete an allocation if we have a candidate extent.
+ */
+STATIC int
+xfs_alloc_best_finish(
+	struct xfs_alloc_arg	*args,
+	struct xfs_alloc_best	*best,
+	int			*stat)
+{
+	struct xfs_btree_cur	*bno_cur;
+	int			error;
+
+	ASSERT(best->len);
+	ASSERT(best->cur[XFS_ALLOC_CNT]);
+
+	/*
+	 * Reuse or reopen a bnobt cursor if necessary. It will be closed as
+	 * part of the xfs_alloc_best cleanup.
+	 */
+	if (best->cur[XFS_ALLOC_BNOLT]) {
+		bno_cur = best->cur[XFS_ALLOC_BNOLT];
+	} else if (best->cur[XFS_ALLOC_BNOGT]) {
+		bno_cur = best->cur[XFS_ALLOC_BNOGT];
+	} else {
+		bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp,
+						  args->agbp, args->agno,
+						  XFS_BTNUM_BNO);
+		best->cur[XFS_ALLOC_BNOLT] = bno_cur;
+	}
+
+	error = xfs_alloc_fixup_trees(best->cur[XFS_ALLOC_CNT], bno_cur,
+				      best->rec_bno, best->rec_len, best->bno,
+				      best->len, 0);
+	if (error)
+		return error;
+
+	args->agbno = best->bno;
+	args->len = best->len;
+	*stat = 1;
+
+	return 0;
+}
+
+STATIC int
+xfs_alloc_size_iter(
+	struct xfs_alloc_arg	*args,
+	struct xfs_alloc_best	*best,
+	enum xfs_alloc_cur	cidx)
+{
+	struct xfs_btree_cur	*cur = best->cur[cidx];
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len, cur_len;
+	int			error;
+	int			i;
+
+	/*
+	 * Store the current search key and perform a locality optimized lookup.
+	 */
+	cur_len = best->cur_len;
+	error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
+	if (error)
+		return error;
+	if (i == 0) {
+		best->done = true;
+		return 0;
+	}
+	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+	if (error)
+		return error;
+
+	/*
+	 * Check the record and update the search key to the extent length we
+	 * actually found in the tree.
+	 */
+	XFS_WANT_CORRUPTED_RETURN(args->mp, i == 1);
+	xfs_alloc_best_check(args, bno, len, best, cidx);
+	ASSERT(len >= best->cur_len);
+	best->cur_len = len;
+
+	/*
+	 * We looked up the first record >= [agbno, len] above. If this is a
+	 * cntbt search, the agbno is a secondary key and so the record may not
+	 * start beyond agbno if there are no such records for the particular
+	 * length. If it is past agbno, check the previous record too so long as
+	 * the length matches as it may be closer.
+	 */
+	if (bno > args->agbno) {
+		error = xfs_btree_decrement(cur, 0, &i);
+		if (error)
+			return error;
+		if (i) {
+			error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+			if (error)
+				return error;
+			XFS_WANT_CORRUPTED_RETURN(args->mp, i == 1);
+			if (len == best->cur_len)
+				xfs_alloc_best_check(args, bno, len, best,
+						     cidx);
+		}
+	}
+
+	/*
+	 * Increment the search key if we haven't yet found a candidate extent
+	 * or this lookup already significantly bumped the key. We have to make
+	 * sure to not skip any records until we have at least one allocatable
+	 * extent. Note that if the allocation ultimately fails due to busy
+	 * extents, we'll flush the busy list and restart the whole thing.
+	 *
+	 * Otherwise, double the search key for the next lookup to optimize the
+	 * search. This allows us to find good enough locality at the expense of
+	 * absolute best locality. Max extent size and reasonable allocation
+	 * efficiency are more important here than perfect locality.
+	 */
+	cur_len <<= 1;
+	if (!best->len || best->cur_len >= cur_len)
+		best->cur_len++;
+	else
+		best->cur_len = cur_len;
+
+	return error;
+}
+
+STATIC int
+xfs_alloc_bno_iter(
+	struct xfs_alloc_arg	*args,
+	struct xfs_alloc_best	*best,
+	enum xfs_alloc_cur	cidx,
+	int			iters,
+	int			*stat)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
+	int			i;
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len;
+	bool			found = false;
+
+	*stat = 0;
+	cur = best->cur[cidx];
+	if (!cur)
+		return 0;
+
+	for (; iters > 0; iters--) {
+		error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+		if (error)
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(args->mp, i == 1);
+		found = xfs_alloc_best_check(args, bno, len, best, cidx);
+		if (found) {
+			*stat = 1;
+			break;
+		}
+		if (!best->cur[cidx])
+			break;
+
+		if (cidx == XFS_ALLOC_BNOLT)
+			error = xfs_btree_decrement(cur, 0, &i);
+		else if (cidx == XFS_ALLOC_BNOGT)
+			error = xfs_btree_increment(cur, 0, &i);
+		if (error)
+			return error;
+		if (i == 0) {
+			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+			best->cur[cidx] = NULL;
+			break;
+		}
+	}
+
+	return error;
+}
+
+STATIC int
+xfs_alloc_ag_vextent_new(
+	struct xfs_alloc_arg	*args,
+	struct xfs_alloc_best	*best,
+	int			*stat)
+{
+	int			error;
+	int			i;
+	unsigned int		findbest = 0;
+	enum xfs_alloc_cur	findbestc;
+
+	*stat = 0;
+
+	error = xfs_alloc_best_setup(args, best);
+	if (error)
+		goto out;
+
+	best->cur_len = args->maxlen;
+	while (!best->done) {
+		/*
+		 * Search the bnobt left and right. If either of these find a
+		 * suitable extent, we know we've found ideal locality. Do a
+		 * capped search in the opposite direction and we're done.
+		 */
+		error = xfs_alloc_bno_iter(args, best, XFS_ALLOC_BNOLT, 1, &i);
+		if (error)
+			goto out;
+		if (i) {
+			best->done = true;
+			findbest = args->mp->m_alloc_mxr[0];
+			findbestc = XFS_ALLOC_BNOGT;
+			break;
+		}
+
+		error = xfs_alloc_bno_iter(args, best, XFS_ALLOC_BNOGT, 1, &i);
+		if (error)
+			goto out;
+		if (i) {
+			best->done = true;
+			findbest = args->mp->m_alloc_mxr[0];
+			findbestc = XFS_ALLOC_BNOLT;
+			break;
+		}
+
+		/*
+		 * Search for an extent based on size. This only sets best.done
+		 * if we hit the end of the tree.
+		 */
+		error = xfs_alloc_size_iter(args, best, XFS_ALLOC_CNT);
+		if (error)
+			goto out;
+	}
+
+	if (findbest) {
+		error = xfs_alloc_bno_iter(args, best, findbestc, findbest, &i);
+		if (error)
+			goto out;
+	}
+
+	if (best->len)
+		error = xfs_alloc_best_finish(args, best, stat);
+
+out:
+	xfs_alloc_best_close(best, error);
+	return error;
+}
+
 /*
  * Allocate a variable extent near bno in the allocation group agno.
  * Extent's length (returned in len) will be between minlen and maxlen,
@@ -1326,7 +1712,7 @@ xfs_alloc_ag_vextent_near(
 	struct xfs_alloc_arg	*args)	/* allocation argument structure */
 {
 	struct xfs_btree_cur	*bno_cur = NULL;/* cursor for bno btree */
-	struct xfs_btree_cur	*cnt_cur;	/* cursor for count btree */
+	struct xfs_btree_cur	*cnt_cur = NULL;/* cursor for count btree */
 	int			error;		/* error code */
 	int			i;		/* result code, temporary */
 	xfs_agblock_t		bno;	      /* start bno of left side entry */
@@ -1357,6 +1743,23 @@ xfs_alloc_ag_vextent_near(
 	len = 0;
 	busy = false;
 
+	if (args->agbno != NULLAGBLOCK) {
+		struct xfs_alloc_best	best = {0,};
+
+		error = xfs_alloc_ag_vextent_new(args, &best, &i);
+		if (error)
+			goto error;
+		if (i) {
+			trace_xfs_alloc_near_new(args);
+			return 0;
+		}
+		if (best.busy) {
+			xfs_extent_busy_flush(args->mp, args->pag,
+					      best.busy_gen);
+			goto restart;
+		}
+	}
+
 	/*
 	 * Get a cursor for the by-size btree and start with a lookup for maxlen
 	 * free extents.
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8a6532aae779..7a4e5829d594 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1626,6 +1626,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
 DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
 DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_new);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-- 
2.17.2




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux