[PATCH 2/5] xfs: use generic percpu counters for inode counter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



XFS has hand-rolled per-cpu counters for the superblock since before
there was any generic implementation. There are some warts around
the  use of them for the inode counter as the hand rolled counter is
designed to be accurate at zero, but has no specific accurracy at
any other value. This design causes problems for the maximum inode
count threshold enforcement, as there is no trigger that balances
the counters as they get close tothe maximum threshold.

Instead of designing new triggers for balancing, just replace the
handrolled per-cpu counter with a generic counter directly in the
incore superblock. This enables us to update the counter throught eh
normal superblock modification funtions, rather than having to go
around them and then fall back to the normal modificaiton functions
if the per-cpu nature of the counter has been turned off.

The result is a much simpler counter implementation that can be used
accurately across it's entire range of values rather than just near
to zero values.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_ialloc.c |  6 +++--
 fs/xfs/libxfs/xfs_sb.c     | 10 +++++---
 fs/xfs/xfs_fsops.c         |  3 ++-
 fs/xfs/xfs_mount.c         | 58 +++++++++++++++++-----------------------------
 fs/xfs/xfs_mount.h         |  1 -
 fs/xfs/xfs_super.c         |  8 ++++---
 fs/xfs/xfs_super.h         |  4 +++-
 fs/xfs/xfs_trans.c         |  5 ++--
 8 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 116ef1d..95a1762 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
 	 */
 	newlen = args.mp->m_ialloc_inos;
 	if (args.mp->m_maxicount &&
-	    args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+	    percpu_counter_read(&args.mp->m_sb.sb_icount) + newlen >
+							args.mp->m_maxicount)
 		return -ENOSPC;
 	args.minlen = args.maxlen = args.mp->m_ialloc_blks;
 	/*
@@ -1340,7 +1341,8 @@ xfs_dialloc(
 	 * inode.
 	 */
 	if (mp->m_maxicount &&
-	    mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+	    percpu_counter_read(&mp->m_sb.sb_icount) + mp->m_ialloc_inos >
+							mp->m_maxicount) {
 		noroom = 1;
 		okalloc = 0;
 	}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 4cf335b..7bfa527 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -357,7 +357,8 @@ __xfs_sb_from_disk(
 	to->sb_rextslog = from->sb_rextslog;
 	to->sb_inprogress = from->sb_inprogress;
 	to->sb_imax_pct = from->sb_imax_pct;
-	to->sb_icount = be64_to_cpu(from->sb_icount);
+	if (percpu_counter_initialized(&to->sb_icount))
+		percpu_counter_set(&to->sb_icount, be64_to_cpu(from->sb_icount));
 	to->sb_ifree = be64_to_cpu(from->sb_ifree);
 	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
 	to->sb_frextents = be64_to_cpu(from->sb_frextents);
@@ -492,7 +493,7 @@ xfs_sb_to_disk(
 	to->sb_rextslog = from->sb_rextslog;
 	to->sb_inprogress = from->sb_inprogress;
 	to->sb_imax_pct = from->sb_imax_pct;
-	to->sb_icount = cpu_to_be64(from->sb_icount);
+	to->sb_icount = cpu_to_be64(percpu_counter_sum(&from->sb_icount));
 	to->sb_ifree = cpu_to_be64(from->sb_ifree);
 	to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks);
 	to->sb_frextents = cpu_to_be64(from->sb_frextents);
@@ -537,6 +538,9 @@ xfs_sb_verify(
 	struct xfs_mount *mp = bp->b_target->bt_mount;
 	struct xfs_sb	sb;
 
+	/* don't need to validate icount here */
+	sb.sb_icount.counters = NULL;
+
 	/*
 	 * Use call variant which doesn't convert quota flags from disk 
 	 * format, because xfs_mount_validate_sb checks the on-disk flags.
@@ -748,7 +752,7 @@ xfs_initialize_perag_data(
 	 */
 	spin_lock(&mp->m_sb_lock);
 	sbp->sb_ifree = ifree;
-	sbp->sb_icount = ialloc;
+	percpu_counter_set(&sbp->sb_icount, ialloc);
 	sbp->sb_fdblocks = bfree + bfreelst + btree;
 	spin_unlock(&mp->m_sb_lock);
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index f711452..9cc34d2 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -631,11 +631,12 @@ xfs_fs_counts(
 	xfs_fsop_counts_t	*cnt)
 {
 	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+	cnt->allocino = percpu_counter_read_positive(&mp->m_sb.sb_icount);
+
 	spin_lock(&mp->m_sb_lock);
 	cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
 	cnt->freertx = mp->m_sb.sb_frextents;
 	cnt->freeino = mp->m_sb.sb_ifree;
-	cnt->allocino = mp->m_sb.sb_icount;
 	spin_unlock(&mp->m_sb_lock);
 	return 0;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6015f54..df5ec55 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1127,13 +1127,13 @@ xfs_mod_incore_sb_unlocked(
 	 */
 	switch (field) {
 	case XFS_SBS_ICOUNT:
-		lcounter = (long long)mp->m_sb.sb_icount;
-		lcounter += delta;
-		if (lcounter < 0) {
+		/* deltas are +/-64, hence the large batch size of 128. */
+		__percpu_counter_add(&mp->m_sb.sb_icount, delta, 128);
+		if (percpu_counter_compare(&mp->m_sb.sb_icount, 0) < 0) {
 			ASSERT(0);
+			percpu_counter_add(&mp->m_sb.sb_icount, -delta);
 			return -EINVAL;
 		}
-		mp->m_sb.sb_icount = lcounter;
 		return 0;
 	case XFS_SBS_IFREE:
 		lcounter = (long long)mp->m_sb.sb_ifree;
@@ -1288,8 +1288,11 @@ xfs_mod_incore_sb(
 	int			status;
 
 #ifdef HAVE_PERCPU_SB
-	ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
+	ASSERT(field < XFS_SBS_IFREE || field > XFS_SBS_FDBLOCKS);
 #endif
+	if (field == XFS_SBS_ICOUNT)
+		return xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+
 	spin_lock(&mp->m_sb_lock);
 	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
 	spin_unlock(&mp->m_sb_lock);
@@ -1492,7 +1495,6 @@ xfs_icsb_cpu_notify(
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		xfs_icsb_lock(mp);
-		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
 		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
 		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
 		xfs_icsb_unlock(mp);
@@ -1504,17 +1506,14 @@ xfs_icsb_cpu_notify(
 		 * re-enable the counters. */
 		xfs_icsb_lock(mp);
 		spin_lock(&mp->m_sb_lock);
-		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
 		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
 		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
 
-		mp->m_sb.sb_icount += cntp->icsb_icount;
 		mp->m_sb.sb_ifree += cntp->icsb_ifree;
 		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
 
 		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 
-		xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
 		xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
 		xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
 		spin_unlock(&mp->m_sb_lock);
@@ -1533,9 +1532,15 @@ xfs_icsb_init_counters(
 	xfs_icsb_cnts_t *cntp;
 	int		i;
 
+	i = percpu_counter_init(&mp->m_sb.sb_icount, 0, GFP_KERNEL);
+	if (i)
+		return ENOMEM;
+
 	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-	if (mp->m_sb_cnts == NULL)
+	if (!mp->m_sb_cnts) {
+		percpu_counter_destroy(&mp->m_sb.sb_icount);
 		return -ENOMEM;
+	}
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
@@ -1569,7 +1574,6 @@ xfs_icsb_reinit_counters(
 	 * initial balance kicks us off correctly
 	 */
 	mp->m_icsb_counters = -1;
-	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
 	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
 	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
 	xfs_icsb_unlock(mp);
@@ -1583,6 +1587,9 @@ xfs_icsb_destroy_counters(
 		unregister_hotcpu_notifier(&mp->m_icsb_notifier);
 		free_percpu(mp->m_sb_cnts);
 	}
+
+	percpu_counter_destroy(&mp->m_sb.sb_icount);
+
 	mutex_destroy(&mp->m_icsb_mutex);
 }
 
@@ -1645,7 +1652,6 @@ xfs_icsb_count(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		cnt->icsb_icount += cntp->icsb_icount;
 		cnt->icsb_ifree += cntp->icsb_ifree;
 		cnt->icsb_fdblocks += cntp->icsb_fdblocks;
 	}
@@ -1659,7 +1665,7 @@ xfs_icsb_counter_disabled(
 	xfs_mount_t	*mp,
 	xfs_sb_field_t	field)
 {
-	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+	ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
 	return test_bit(field, &mp->m_icsb_counters);
 }
 
@@ -1670,7 +1676,7 @@ xfs_icsb_disable_counter(
 {
 	xfs_icsb_cnts_t	cnt;
 
-	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+	ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
 
 	/*
 	 * If we are already disabled, then there is nothing to do
@@ -1689,9 +1695,6 @@ xfs_icsb_disable_counter(
 
 		xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
 		switch(field) {
-		case XFS_SBS_ICOUNT:
-			mp->m_sb.sb_icount = cnt.icsb_icount;
-			break;
 		case XFS_SBS_IFREE:
 			mp->m_sb.sb_ifree = cnt.icsb_ifree;
 			break;
@@ -1716,15 +1719,12 @@ xfs_icsb_enable_counter(
 	xfs_icsb_cnts_t	*cntp;
 	int		i;
 
-	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+	ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
 
 	xfs_icsb_lock_all_counters(mp);
 	for_each_online_cpu(i) {
 		cntp = per_cpu_ptr(mp->m_sb_cnts, i);
 		switch (field) {
-		case XFS_SBS_ICOUNT:
-			cntp->icsb_icount = count + resid;
-			break;
 		case XFS_SBS_IFREE:
 			cntp->icsb_ifree = count + resid;
 			break;
@@ -1750,8 +1750,6 @@ xfs_icsb_sync_counters_locked(
 
 	xfs_icsb_count(mp, &cnt, flags);
 
-	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
-		mp->m_sb.sb_icount = cnt.icsb_icount;
 	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
 		mp->m_sb.sb_ifree = cnt.icsb_ifree;
 	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
@@ -1805,12 +1803,6 @@ xfs_icsb_balance_counter_locked(
 
 	/* update counters  - first CPU gets residual*/
 	switch (field) {
-	case XFS_SBS_ICOUNT:
-		count = mp->m_sb.sb_icount;
-		resid = do_div(count, weight);
-		if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-			return;
-		break;
 	case XFS_SBS_IFREE:
 		count = mp->m_sb.sb_ifree;
 		resid = do_div(count, weight);
@@ -1871,14 +1863,6 @@ again:
 	}
 
 	switch (field) {
-	case XFS_SBS_ICOUNT:
-		lcounter = icsbp->icsb_icount;
-		lcounter += delta;
-		if (unlikely(lcounter < 0))
-			goto balance_counter;
-		icsbp->icsb_icount = lcounter;
-		break;
-
 	case XFS_SBS_IFREE:
 		lcounter = icsbp->icsb_ifree;
 		lcounter += delta;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9d62134..9499a8f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -41,7 +41,6 @@ struct xfs_da_geometry;
 typedef struct xfs_icsb_cnts {
 	uint64_t	icsb_fdblocks;
 	uint64_t	icsb_ifree;
-	uint64_t	icsb_icount;
 	unsigned long	icsb_flags;
 } xfs_icsb_cnts_t;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7583044..408e2fe 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1087,6 +1087,7 @@ xfs_fs_statfs(
 	struct xfs_sb		*sbp = &mp->m_sb;
 	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
 	__uint64_t		fakeinos, id;
+	__uint64_t		sb_icount;
 	xfs_extlen_t		lsize;
 	__int64_t		ffree;
 
@@ -1098,6 +1099,7 @@ xfs_fs_statfs(
 	statp->f_fsid.val[1] = (u32)(id >> 32);
 
 	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+	sb_icount = percpu_counter_sum(&sbp->sb_icount);
 
 	spin_lock(&mp->m_sb_lock);
 	statp->f_bsize = sbp->sb_blocksize;
@@ -1106,15 +1108,15 @@ xfs_fs_statfs(
 	statp->f_bfree = statp->f_bavail =
 				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-	statp->f_files =
-	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+	statp->f_files = MIN(sb_icount + fakeinos,
+			     (__uint64_t)XFS_MAXINUMBER);
 	if (mp->m_maxicount)
 		statp->f_files = min_t(typeof(statp->f_files),
 					statp->f_files,
 					mp->m_maxicount);
 
 	/* make sure statp->f_ffree does not underflow */
-	ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+	ffree = statp->f_files - (sb_icount - sbp->sb_ifree);
 	statp->f_ffree = max_t(__int64_t, ffree, 0);
 
 	spin_unlock(&mp->m_sb_lock);
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index a02236b..fa5603c 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -96,7 +96,9 @@ struct xfs_sb {
 	__uint8_t	sb_inprogress;	/* mkfs is in progress, don't mount */
 	__uint8_t	sb_imax_pct;	/* max % of fs for inode space */
 					/* statistics */
-	__uint64_t	sb_icount;	/* allocated inodes */
+
+	struct percpu_counter	sb_icount;	/* allocated inodes */
+
 	__uint64_t	sb_ifree;	/* free inodes */
 	__uint64_t	sb_fdblocks;	/* free data blocks */
 	__uint64_t	sb_frextents;	/* free realtime extents */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd5..d78b0ae 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -554,8 +554,7 @@ xfs_trans_unreserve_and_mod_sb(
 	}
 
 	if (idelta) {
-		error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
-						 idelta, rsvd);
+		error = xfs_mod_incore_sb(mp, XFS_SBS_ICOUNT, idelta, rsvd);
 		if (error)
 			goto out_undo_fdblocks;
 	}
@@ -634,7 +633,7 @@ out_undo_ifreecount:
 		xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
 out_undo_icount:
 	if (idelta)
-		xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+		xfs_mod_incore_sb(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
 out_undo_fdblocks:
 	if (blkdelta)
 		xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
-- 
2.0.0

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs




[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux