[PATCH 09/43] xfs: generalize the freespace and reserved blocks handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The main handling of the incore per-cpu freespace counters is already
handled in xfs_mod_freecounter for both the block and RT extent cases,
but the actual counter is passed in an special cases.

Replace both the percpu counters and the resblks counters with arrays,
so that support reserved RT extents can be supported, which will be
needed for garbarge collection on zoned devices.

Use helpers to access the freespace counters everywhere intead of
poking through the abstraction by using the percpu_count helpers
directly.  This also switches the flooring of the frextents counter
to 0 in statfs for the rthinherit case to a manual min_t call to match
the handling of the fdblocks counter for normal file systems.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
 fs/xfs/libxfs/xfs_ialloc.c       |  2 +-
 fs/xfs/libxfs/xfs_metafile.c     |  2 +-
 fs/xfs/libxfs/xfs_sb.c           |  7 +--
 fs/xfs/scrub/fscounters.c        | 13 +++---
 fs/xfs/scrub/fscounters_repair.c |  4 +-
 fs/xfs/scrub/newbt.c             |  2 +-
 fs/xfs/xfs_fsops.c               | 27 ++++++------
 fs/xfs/xfs_fsops.h               |  3 +-
 fs/xfs/xfs_icache.c              |  4 +-
 fs/xfs/xfs_ioctl.c               | 12 +++---
 fs/xfs/xfs_iomap.c               |  9 ++--
 fs/xfs/xfs_mount.c               | 58 ++++++++++++++++---------
 fs/xfs/xfs_mount.h               | 65 +++++++++++++++++++---------
 fs/xfs/xfs_rtalloc.c             |  2 +-
 fs/xfs/xfs_super.c               | 74 ++++++++++++++++++--------------
 fs/xfs/xfs_trace.h               |  2 +-
 16 files changed, 171 insertions(+), 115 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f3a840a425f5..57513ba19d6a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1927,7 +1927,7 @@ xfs_dialloc(
 	 * that we can immediately allocate, but then we allow allocation on the
 	 * second pass if we fail to find an AG with free inodes in it.
 	 */
-	if (percpu_counter_read_positive(&mp->m_fdblocks) <
+	if (xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) <
 			mp->m_low_space[XFS_LOWSP_1_PCNT]) {
 		ok_alloc = false;
 		low_space = true;
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
index e151663cc9ef..c84820f5bdc6 100644
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -77,7 +77,7 @@ xfs_metafile_resv_can_cover(
 	 * There aren't enough blocks left in the inode's reservation, but it
 	 * isn't critical unless there also isn't enough free space.
 	 */
-	return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
+	return xfs_compare_freecounter(ip->i_mount, XC_FREE_BLOCKS,
 			rhs - ip->i_delayed_blks, 2048) >= 0;
 }
 
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 3dc5f5dba162..090f133f4da3 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1266,7 +1266,7 @@ xfs_log_sb(
 				percpu_counter_sum_positive(&mp->m_ifree),
 				mp->m_sb.sb_icount);
 		mp->m_sb.sb_fdblocks =
-				percpu_counter_sum_positive(&mp->m_fdblocks);
+			max(0LL, xfs_sum_freecounter(mp, XC_FREE_BLOCKS));
 	}
 
 	/*
@@ -1275,9 +1275,10 @@ xfs_log_sb(
 	 * we handle nearly-lockless reservations, so we must use the _positive
 	 * variant here to avoid writing out nonsense frextents.
 	 */
-	if (xfs_has_rtgroups(mp))
+	if (xfs_has_rtgroups(mp)) {
 		mp->m_sb.sb_frextents =
-				percpu_counter_sum_positive(&mp->m_frextents);
+			xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS);
+	}
 
 	xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index ca23cf4db6c5..732658a62a2d 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -350,7 +350,7 @@ xchk_fscount_aggregate_agcounts(
 	 * The global incore space reservation is taken from the incore
 	 * counters, so leave that out of the computation.
 	 */
-	fsc->fdblocks -= mp->m_resblks_avail;
+	fsc->fdblocks -= mp->m_resblks[XC_FREE_BLOCKS].avail;
 
 	/*
 	 * Delayed allocation reservations are taken out of the incore counters
@@ -513,8 +513,8 @@ xchk_fscounters(
 	/* Snapshot the percpu counters. */
 	icount = percpu_counter_sum(&mp->m_icount);
 	ifree = percpu_counter_sum(&mp->m_ifree);
-	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
-	frextents = percpu_counter_sum(&mp->m_frextents);
+	fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
+	frextents = xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS);
 
 	/* No negative values, please! */
 	if (icount < 0 || ifree < 0)
@@ -589,15 +589,16 @@ xchk_fscounters(
 			try_again = true;
 	}
 
-	if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
-			fsc->fdblocks)) {
+	if (!xchk_fscount_within_range(sc, fdblocks,
+			&mp->m_free[XC_FREE_BLOCKS], fsc->fdblocks)) {
 		if (fsc->frozen)
 			xchk_set_corrupt(sc);
 		else
 			try_again = true;
 	}
 
-	if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+	if (!xchk_fscount_within_range(sc, frextents,
+			&mp->m_free[XC_FREE_RTEXTENTS],
 			fsc->frextents - fsc->frextents_delayed)) {
 		if (fsc->frozen)
 			xchk_set_corrupt(sc);
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
index cda13447a373..8fb0db78489e 100644
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -64,7 +64,7 @@ xrep_fscounters(
 
 	percpu_counter_set(&mp->m_icount, fsc->icount);
 	percpu_counter_set(&mp->m_ifree, fsc->ifree);
-	percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+	xfs_set_freecounter(mp, XC_FREE_BLOCKS, fsc->fdblocks);
 
 	/*
 	 * Online repair is only supported on v5 file systems, which require
@@ -74,7 +74,7 @@ xrep_fscounters(
 	 * track of the delalloc reservations separately, as they are are
 	 * subtracted from m_frextents, but not included in sb_frextents.
 	 */
-	percpu_counter_set(&mp->m_frextents,
+	xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
 		fsc->frextents - fsc->frextents_delayed);
 	if (!xfs_has_rtgroups(mp))
 		mp->m_sb.sb_frextents = fsc->frextents;
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index ac38f5843090..3e46b04f427f 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -62,7 +62,7 @@ xrep_newbt_estimate_slack(
 		free = sc->sa.pag->pagf_freeblks;
 		sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
 	} else {
-		free = percpu_counter_sum(&sc->mp->m_fdblocks);
+		free = xfs_sum_freecounter(sc->mp, XC_FREE_BLOCKS);
 		sz = sc->mp->m_sb.sb_dblocks;
 	}
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 455298503d01..bb2e31e338b8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -366,6 +366,7 @@ xfs_growfs_log(
 int
 xfs_reserve_blocks(
 	struct xfs_mount	*mp,
+	enum xfs_free_counter	ctr,
 	uint64_t		request)
 {
 	int64_t			lcounter, delta;
@@ -373,6 +374,8 @@ xfs_reserve_blocks(
 	int64_t			free;
 	int			error = 0;
 
+	ASSERT(ctr < XC_FREE_NR);
+
 	/*
 	 * With per-cpu counters, this becomes an interesting problem. we need
 	 * to work out if we are freeing or allocation blocks first, then we can
@@ -391,16 +394,16 @@ xfs_reserve_blocks(
 	 * counters directly since we shouldn't have any problems unreserving
 	 * space.
 	 */
-	if (mp->m_resblks > request) {
-		lcounter = mp->m_resblks_avail - request;
+	if (mp->m_resblks[ctr].total > request) {
+		lcounter = mp->m_resblks[ctr].avail - request;
 		if (lcounter > 0) {		/* release unused blocks */
 			fdblks_delta = lcounter;
-			mp->m_resblks_avail -= lcounter;
+			mp->m_resblks[ctr].avail -= lcounter;
 		}
-		mp->m_resblks = request;
+		mp->m_resblks[ctr].total = request;
 		if (fdblks_delta) {
 			spin_unlock(&mp->m_sb_lock);
-			xfs_add_fdblocks(mp, fdblks_delta);
+			xfs_add_freecounter(mp, ctr, fdblks_delta);
 			spin_lock(&mp->m_sb_lock);
 		}
 
@@ -409,7 +412,7 @@ xfs_reserve_blocks(
 
 	/*
 	 * If the request is larger than the current reservation, reserve the
-	 * blocks before we update the reserve counters. Sample m_fdblocks and
+	 * blocks before we update the reserve counters. Sample m_free and
 	 * perform a partial reservation if the request exceeds free space.
 	 *
 	 * The code below estimates how many blocks it can request from
@@ -419,10 +422,10 @@ xfs_reserve_blocks(
 	 * space to fill it because mod_fdblocks will refill an undersized
 	 * reserve when it can.
 	 */
-	free = percpu_counter_sum(&mp->m_fdblocks) -
-						xfs_fdblocks_unavailable(mp);
-	delta = request - mp->m_resblks;
-	mp->m_resblks = request;
+	free = xfs_sum_freecounter(mp, ctr) -
+		xfs_freecounter_unavailable(mp, ctr);
+	delta = request - mp->m_resblks[ctr].total;
+	mp->m_resblks[ctr].total = request;
 	if (delta > 0 && free > 0) {
 		/*
 		 * We'll either succeed in getting space from the free block
@@ -436,9 +439,9 @@ xfs_reserve_blocks(
 		 */
 		fdblks_delta = min(free, delta);
 		spin_unlock(&mp->m_sb_lock);
-		error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+		error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
 		if (!error)
-			xfs_add_fdblocks(mp, fdblks_delta);
+			xfs_add_freecounter(mp, ctr, fdblks_delta);
 		spin_lock(&mp->m_sb_lock);
 	}
 out:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 3e2f73bcf831..9d23c361ef56 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -8,7 +8,8 @@
 
 int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
 int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
+		uint64_t request);
 int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
 
 int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7b6c026d01a1..c9ded501e89b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2076,7 +2076,7 @@ xfs_inodegc_want_queue_rt_file(
 	if (!XFS_IS_REALTIME_INODE(ip))
 		return false;
 
-	if (__percpu_counter_compare(&mp->m_frextents,
+	if (xfs_compare_freecounter(mp, XC_FREE_RTEXTENTS,
 				mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
 				XFS_FDBLOCKS_BATCH) < 0)
 		return true;
@@ -2104,7 +2104,7 @@ xfs_inodegc_want_queue_work(
 	if (items > mp->m_ino_geo.inodes_per_cluster)
 		return true;
 
-	if (__percpu_counter_compare(&mp->m_fdblocks,
+	if (xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
 				mp->m_low_space[XFS_LOWSP_5_PCNT],
 				XFS_FDBLOCKS_BATCH) < 0)
 		return true;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index de8ba5345e17..d3cf62d81f0d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
 		error = mnt_want_write_file(filp);
 		if (error)
 			return error;
-		error = xfs_reserve_blocks(mp, fsop.resblks);
+		error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
 		mnt_drop_write_file(filp);
 		if (error)
 			return error;
 	}
 
 	spin_lock(&mp->m_sb_lock);
-	fsop.resblks = mp->m_resblks;
-	fsop.resblks_avail = mp->m_resblks_avail;
+	fsop.resblks = mp->m_resblks[XC_FREE_BLOCKS].total;
+	fsop.resblks_avail = mp->m_resblks[XC_FREE_BLOCKS].avail;
 	spin_unlock(&mp->m_sb_lock);
 
 	if (copy_to_user(arg, &fsop, sizeof(fsop)))
@@ -1155,9 +1155,9 @@ xfs_ioctl_fs_counts(
 	struct xfs_fsop_counts	out = {
 		.allocino = percpu_counter_read_positive(&mp->m_icount),
 		.freeino  = percpu_counter_read_positive(&mp->m_ifree),
-		.freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
-				xfs_fdblocks_unavailable(mp),
-		.freertx  = percpu_counter_read_positive(&mp->m_frextents),
+		.freedata = xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) -
+				xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS),
+		.freertx  = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS),
 	};
 
 	if (copy_to_user(uarg, &out, sizeof(out)))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index b3783d7b8ebe..f3f4b5c328c3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -432,13 +432,14 @@ xfs_quota_calc_throttle(
 
 static int64_t
 xfs_iomap_freesp(
-	struct percpu_counter	*counter,
+	struct xfs_mount	*mp,
+	unsigned int		idx,
 	uint64_t		low_space[XFS_LOWSP_MAX],
 	int			*shift)
 {
 	int64_t			freesp;
 
-	freesp = percpu_counter_read_positive(counter);
+	freesp = xfs_estimate_freecounter(mp, idx);
 	if (freesp < low_space[XFS_LOWSP_5_PCNT]) {
 		*shift = 2;
 		if (freesp < low_space[XFS_LOWSP_4_PCNT])
@@ -537,10 +538,10 @@ xfs_iomap_prealloc_size(
 
 	if (unlikely(XFS_IS_REALTIME_INODE(ip)))
 		freesp = xfs_rtbxlen_to_blen(mp,
-				xfs_iomap_freesp(&mp->m_frextents,
+				xfs_iomap_freesp(mp, XC_FREE_RTEXTENTS,
 					mp->m_low_rtexts, &shift));
 	else
-		freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space,
+		freesp = xfs_iomap_freesp(mp, XC_FREE_BLOCKS, mp->m_low_space,
 				&shift);
 
 	/*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 66b91b582691..4174035b2ac9 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1058,7 +1058,8 @@ xfs_mountfs(
 	 * we were already there on the last unmount. Warn if this occurs.
 	 */
 	if (!xfs_is_readonly(mp)) {
-		error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
+		error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS,
+				xfs_default_resblks(mp));
 		if (error)
 			xfs_warn(mp,
 	"Unable to allocate reserve blocks. Continuing without reserve pool.");
@@ -1178,7 +1179,7 @@ xfs_unmountfs(
 	 * we only every apply deltas to the superblock and hence the incore
 	 * value does not matter....
 	 */
-	error = xfs_reserve_blocks(mp, 0);
+	error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
 	if (error)
 		xfs_warn(mp, "Unable to free reserved block pool. "
 				"Freespace may not be correct on next mount.");
@@ -1225,52 +1226,68 @@ xfs_fs_writable(
 	return true;
 }
 
+/*
+ * Estimate the amount of free space that is not available to userspace and is
+ * not explicitly reserved from the incore fdblocks.  This includes:
+ *
+ * - The minimum number of blocks needed to support splitting a bmap btree
+ * - The blocks currently in use by the freespace btrees because they record
+ *   the actual blocks that will fill per-AG metadata space reservations
+ */
+uint64_t
+xfs_freecounter_unavailable(
+	struct xfs_mount	*mp,
+	enum xfs_free_counter	ctr)
+{
+	if (ctr == XC_FREE_RTEXTENTS)
+		return 0;
+	return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+}
+
 void
 xfs_add_freecounter(
 	struct xfs_mount	*mp,
-	struct percpu_counter	*counter,
+	enum xfs_free_counter	ctr,
 	uint64_t		delta)
 {
-	bool			has_resv_pool = (counter == &mp->m_fdblocks);
 	uint64_t		res_used;
 
 	/*
 	 * If the reserve pool is depleted, put blocks back into it first.
 	 * Most of the time the pool is full.
 	 */
-	if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-		percpu_counter_add(counter, delta);
+	if (likely(mp->m_resblks[ctr].total == mp->m_resblks[ctr].avail)) {
+		percpu_counter_add(&mp->m_free[ctr], delta);
 		return;
 	}
 
 	spin_lock(&mp->m_sb_lock);
-	res_used = mp->m_resblks - mp->m_resblks_avail;
+	res_used = mp->m_resblks[ctr].total - mp->m_resblks[ctr].avail;
 	if (res_used > delta) {
-		mp->m_resblks_avail += delta;
+		mp->m_resblks[ctr].avail += delta;
 	} else {
 		delta -= res_used;
-		mp->m_resblks_avail = mp->m_resblks;
-		percpu_counter_add(counter, delta);
+		mp->m_resblks[ctr].avail = mp->m_resblks[ctr].total;
+		percpu_counter_add(&mp->m_free[ctr], delta);
 	}
 	spin_unlock(&mp->m_sb_lock);
 }
 
+
+/* Adjust in-core free blocks or RT extents. */
 int
 xfs_dec_freecounter(
 	struct xfs_mount	*mp,
-	struct percpu_counter	*counter,
+	enum xfs_free_counter	ctr,
 	uint64_t		delta,
 	bool			rsvd)
 {
+	struct percpu_counter	*counter = &mp->m_free[ctr];
 	int64_t			lcounter;
 	uint64_t		set_aside = 0;
 	s32			batch;
-	bool			has_resv_pool;
 
-	ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
-	has_resv_pool = (counter == &mp->m_fdblocks);
-	if (rsvd)
-		ASSERT(has_resv_pool);
+	ASSERT(ctr < XC_FREE_NR);
 
 	/*
 	 * Taking blocks away, need to be more accurate the closer we
@@ -1297,8 +1314,7 @@ xfs_dec_freecounter(
 	 * problems (i.e. transaction abort, pagecache discards, etc.) than
 	 * slightly premature -ENOSPC.
 	 */
-	if (has_resv_pool)
-		set_aside = xfs_fdblocks_unavailable(mp);
+	set_aside = xfs_freecounter_unavailable(mp, ctr);
 	percpu_counter_add_batch(counter, -((int64_t)delta), batch);
 	if (__percpu_counter_compare(counter, set_aside,
 				     XFS_FDBLOCKS_BATCH) >= 0) {
@@ -1312,12 +1328,12 @@ xfs_dec_freecounter(
 	 */
 	spin_lock(&mp->m_sb_lock);
 	percpu_counter_add(counter, delta);
-	if (!has_resv_pool || !rsvd)
+	if (!rsvd)
 		goto fdblocks_enospc;
 
-	lcounter = (long long)mp->m_resblks_avail - delta;
+	lcounter = (long long)mp->m_resblks[ctr].avail - delta;
 	if (lcounter >= 0) {
-		mp->m_resblks_avail = lcounter;
+		mp->m_resblks[ctr].avail = lcounter;
 		spin_unlock(&mp->m_sb_lock);
 		return 0;
 	}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index fbed172d6770..d92bce7bc184 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,12 @@ struct xfs_groups {
 	uint64_t		blkmask;
 };
 
+enum xfs_free_counter {
+	XC_FREE_BLOCKS,		/* free block counter */
+	XC_FREE_RTEXTENTS,	/* free rt extent counter */
+	XC_FREE_NR,
+};
+
 /*
  * The struct xfsmount layout is optimised to separate read-mostly variables
  * from variables that are frequently modified. We put the read-mostly variables
@@ -222,8 +228,7 @@ typedef struct xfs_mount {
 	spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
 	struct percpu_counter	m_icount;	/* allocated inodes counter */
 	struct percpu_counter	m_ifree;	/* free inodes counter */
-	struct percpu_counter	m_fdblocks;	/* free block counter */
-	struct percpu_counter	m_frextents;	/* free rt extent counter */
+	struct percpu_counter	m_free[XC_FREE_NR];
 
 	/*
 	 * Count of data device blocks reserved for delayed allocations,
@@ -245,9 +250,11 @@ typedef struct xfs_mount {
 	atomic64_t		m_allocbt_blks;
 
 	struct xfs_groups	m_groups[XG_TYPE_MAX];
-	uint64_t		m_resblks;	/* total reserved blocks */
-	uint64_t		m_resblks_avail;/* available reserved blocks */
-	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
+	struct {
+		uint64_t	total;		/* total reserved blocks */
+		uint64_t	avail;		/* available reserved blocks */
+		uint64_t	save;		/* reserved blks @ remount,ro */
+	} m_resblks[XC_FREE_NR];
 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
 	struct dentry		*m_debugfs;	/* debugfs parent */
 	struct xfs_kobj		m_kobj;
@@ -646,45 +653,61 @@ extern void	xfs_unmountfs(xfs_mount_t *);
  */
 #define XFS_FDBLOCKS_BATCH	1024
 
+uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp,
+		enum xfs_free_counter ctr);
+
+static inline s64 xfs_sum_freecounter(struct xfs_mount *mp,
+		enum xfs_free_counter ctr)
+{
+	return percpu_counter_sum(&mp->m_free[ctr]);
+}
+
 /*
- * Estimate the amount of free space that is not available to userspace and is
- * not explicitly reserved from the incore fdblocks.  This includes:
- *
- * - The minimum number of blocks needed to support splitting a bmap btree
- * - The blocks currently in use by the freespace btrees because they record
- *   the actual blocks that will fill per-AG metadata space reservations
+ * This just provides and estimate without the cpu-local updates, use
+ * xfs_sum_freecounter for the exact value.
  */
-static inline uint64_t
-xfs_fdblocks_unavailable(
-	struct xfs_mount	*mp)
+static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp,
+		enum xfs_free_counter ctr)
+{
+	return percpu_counter_read_positive(&mp->m_free[ctr]);
+}
+
+static inline int xfs_compare_freecounter(struct xfs_mount *mp,
+		enum xfs_free_counter ctr, s64 rhs, s32 batch)
+{
+	return __percpu_counter_compare(&mp->m_free[ctr], rhs, batch);
+}
+
+static inline void xfs_set_freecounter(struct xfs_mount *mp,
+		enum xfs_free_counter ctr, uint64_t val)
 {
-	return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+	percpu_counter_set(&mp->m_free[ctr], val);
 }
 
-int xfs_dec_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+int xfs_dec_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr,
 		uint64_t delta, bool rsvd);
-void xfs_add_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+void xfs_add_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr,
 		uint64_t delta);
 
 static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta,
 		bool reserved)
 {
-	return xfs_dec_freecounter(mp, &mp->m_fdblocks, delta, reserved);
+	return xfs_dec_freecounter(mp, XC_FREE_BLOCKS, delta, reserved);
 }
 
 static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta)
 {
-	xfs_add_freecounter(mp, &mp->m_fdblocks, delta);
+	xfs_add_freecounter(mp, XC_FREE_BLOCKS, delta);
 }
 
 static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta)
 {
-	return xfs_dec_freecounter(mp, &mp->m_frextents, delta, false);
+	return xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, delta, false);
 }
 
 static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta)
 {
-	xfs_add_freecounter(mp, &mp->m_frextents, delta);
+	xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, delta);
 }
 
 extern int	xfs_readsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index bc18b694db75..8da2498417f5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1519,7 +1519,7 @@ xfs_rtalloc_reinit_frextents(
 	spin_lock(&mp->m_sb_lock);
 	mp->m_sb.sb_frextents = val;
 	spin_unlock(&mp->m_sb_lock);
-	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+	xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a74a0cc1f6f6..1960ee0aad45 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -834,10 +834,11 @@ xfs_statfs_data(
 	struct kstatfs		*st)
 {
 	int64_t			fdblocks =
-		percpu_counter_sum(&mp->m_fdblocks);
+		xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
 
 	/* make sure st->f_bfree does not underflow */
-	st->f_bfree = max(0LL, fdblocks - xfs_fdblocks_unavailable(mp));
+	st->f_bfree = max(0LL,
+		fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
 	st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
 }
 
@@ -852,9 +853,9 @@ xfs_statfs_rt(
 	struct kstatfs		*st)
 {
 	int64_t			freertx =
-		percpu_counter_sum_positive(&mp->m_frextents);
+		xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS);
 
-	st->f_bfree = xfs_rtbxlen_to_blen(mp, freertx);
+	st->f_bfree = xfs_rtbxlen_to_blen(mp, max(0LL, freertx));
 	st->f_blocks = mp->m_sb.sb_rblocks;
 }
 
@@ -920,24 +921,32 @@ xfs_fs_statfs(
 }
 
 STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+	struct xfs_mount	*mp)
 {
-	mp->m_resblks_save = mp->m_resblks;
-	xfs_reserve_blocks(mp, 0);
+	enum xfs_free_counter	i;
+
+	for (i = 0; i < XC_FREE_NR; i++) {
+		mp->m_resblks[i].save = mp->m_resblks[i].total;
+		xfs_reserve_blocks(mp, i, 0);
+	}
 }
 
 STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+	struct xfs_mount	*mp)
 {
-	uint64_t resblks;
+	uint64_t		resblks;
+	enum xfs_free_counter	i;
 
-	if (mp->m_resblks_save) {
-		resblks = mp->m_resblks_save;
-		mp->m_resblks_save = 0;
-	} else
-		resblks = xfs_default_resblks(mp);
-
-	xfs_reserve_blocks(mp, resblks);
+	for (i = 0; i < XC_FREE_NR; i++) {
+		if (mp->m_resblks[i].save) {
+			resblks = mp->m_resblks[i].save;
+			mp->m_resblks[i].save = 0;
+		} else
+			resblks = xfs_default_resblks(mp);
+		xfs_reserve_blocks(mp, i, resblks);
+	}
 }
 
 /*
@@ -1063,7 +1072,8 @@ static int
 xfs_init_percpu_counters(
 	struct xfs_mount	*mp)
 {
-	int		error;
+	int			error;
+	enum xfs_free_counter	i;
 
 	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
 	if (error)
@@ -1073,30 +1083,28 @@ xfs_init_percpu_counters(
 	if (error)
 		goto free_icount;
 
-	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
-	if (error)
-		goto free_ifree;
-
 	error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
 	if (error)
-		goto free_fdblocks;
+		goto free_ifree;
 
 	error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
 	if (error)
 		goto free_delalloc;
 
-	error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
-	if (error)
-		goto free_delalloc_rt;
+	for (i = 0; i < XC_FREE_NR; i++) {
+		error = percpu_counter_init(&mp->m_free[i], 0, GFP_KERNEL);
+		if (error)
+			goto free_freecounters;
+	}
 
 	return 0;
 
-free_delalloc_rt:
+free_freecounters:
+	while (--i > 0)
+		percpu_counter_destroy(&mp->m_free[i]);
 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
 free_delalloc:
 	percpu_counter_destroy(&mp->m_delalloc_blks);
-free_fdblocks:
-	percpu_counter_destroy(&mp->m_fdblocks);
 free_ifree:
 	percpu_counter_destroy(&mp->m_ifree);
 free_icount:
@@ -1110,24 +1118,26 @@ xfs_reinit_percpu_counters(
 {
 	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
 	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
-	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
-	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+	xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
+	xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents);
 }
 
 static void
 xfs_destroy_percpu_counters(
 	struct xfs_mount	*mp)
 {
+	enum xfs_free_counter	i;
+
+	for (i = 0; i < XC_FREE_NR; i++)
+		percpu_counter_destroy(&mp->m_free[i]);
 	percpu_counter_destroy(&mp->m_icount);
 	percpu_counter_destroy(&mp->m_ifree);
-	percpu_counter_destroy(&mp->m_fdblocks);
 	ASSERT(xfs_is_shutdown(mp) ||
 	       percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
 	ASSERT(xfs_is_shutdown(mp) ||
 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
 	percpu_counter_destroy(&mp->m_delalloc_blks);
-	percpu_counter_destroy(&mp->m_frextents);
 }
 
 static int
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4fe689410eb6..15dec76fec10 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5621,7 +5621,7 @@ DECLARE_EVENT_CLASS(xfs_metafile_resv_class,
 
 		__entry->dev = mp->m_super->s_dev;
 		__entry->ino = ip->i_ino;
-		__entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+		__entry->freeblks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
 		__entry->reserved = ip->i_delayed_blks;
 		__entry->asked = ip->i_meta_resv_asked;
 		__entry->used = ip->i_nblocks;
-- 
2.45.2





[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux