On Thu, Feb 05, 2015 at 07:54:03AM +1100, Dave Chinner wrote: > XFS has hand-rolled per-cpu counters for the superblock since before > there was any generic implementation. There are some warts around > the use of them for the inode counter as the hand rolled counter is > designed to be accurate at zero, but has no specific accurracy at > any other value. This design causes problems for the maximum inode > count threshold enforcement, as there is no trigger that balances > the counters as they get close tothe maximum threshold. > > Instead of designing new triggers for balancing, just replace the > handrolled per-cpu counter with a generic counter. This enables us > to update the counter through the normal superblock modification > funtions, but rather than do that we add a xfs_mod_icount() helper > function (from Christoph Hellwig) and keep the percpu counter > outside the superblock in the struct xfs_mount. > > This means we still need to initialise the per-cpu counter > specifically when we read the superblock, and vice versa when we > log/write it, but it does mean that we don't need to change any > other code. > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > --- > fs/xfs/libxfs/xfs_ialloc.c | 6 ++-- > fs/xfs/libxfs/xfs_sb.c | 2 ++ > fs/xfs/xfs_fsops.c | 3 +- > fs/xfs/xfs_mount.c | 76 +++++++++++++++++++++------------------------- > fs/xfs/xfs_mount.h | 7 +++-- > fs/xfs/xfs_super.c | 7 +++-- > fs/xfs/xfs_trans.c | 5 ++- > 7 files changed, 54 insertions(+), 52 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c > index 116ef1d..5b4ba9f 100644 > --- a/fs/xfs/libxfs/xfs_ialloc.c > +++ b/fs/xfs/libxfs/xfs_ialloc.c > @@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc( > */ > newlen = args.mp->m_ialloc_inos; > if (args.mp->m_maxicount && > - args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) > + percpu_counter_read(&args.mp->m_icount) + newlen > > + args.mp->m_maxicount) > return -ENOSPC; > args.minlen = args.maxlen = args.mp->m_ialloc_blks; > /* > @@ -1340,7 +1341,8 @@ xfs_dialloc( > * inode. > */ > if (mp->m_maxicount && > - mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { > + percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > > + mp->m_maxicount) { > noroom = 1; > okalloc = 0; > } > diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c > index b0a5fe9..017cb2f 100644 > --- a/fs/xfs/libxfs/xfs_sb.c > +++ b/fs/xfs/libxfs/xfs_sb.c > @@ -771,6 +771,8 @@ xfs_log_sb( > struct xfs_mount *mp = tp->t_mountp; > struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); > > + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); > + > xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); > xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); > xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index f711452..e1470f2 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -631,11 +631,12 @@ xfs_fs_counts( > xfs_fsop_counts_t *cnt) > { > xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); > + cnt->allocino = percpu_counter_read_positive(&mp->m_icount); > + > spin_lock(&mp->m_sb_lock); > cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); > cnt->freertx = mp->m_sb.sb_frextents; > cnt->freeino = mp->m_sb.sb_ifree; > - cnt->allocino = mp->m_sb.sb_icount; > spin_unlock(&mp->m_sb_lock); > return 0; > } > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index 4fa80e6..702ea6a 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -1099,6 +1099,21 @@ xfs_log_sbcount(xfs_mount_t *mp) > return xfs_sync_sb(mp, true); > } > > +int > +xfs_mod_icount( > + struct xfs_mount *mp, > + int64_t delta) > +{ > + /* deltas are +/-64, hence the large batch size of 128. */ > + __percpu_counter_add(&mp->m_icount, delta, 128); > + if (percpu_counter_compare(&mp->m_icount, 0) < 0) { > + ASSERT(0); > + percpu_counter_add(&mp->m_icount, -delta); > + return -EINVAL; > + } > + return 0; > +} > + > /* > * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply > * a delta to a specified field in the in-core superblock. Simply > @@ -1127,14 +1142,8 @@ xfs_mod_incore_sb_unlocked( > */ > switch (field) { > case XFS_SBS_ICOUNT: > - lcounter = (long long)mp->m_sb.sb_icount; > - lcounter += delta; > - if (lcounter < 0) { > - ASSERT(0); > - return -EINVAL; > - } > - mp->m_sb.sb_icount = lcounter; > - return 0; > + ASSERT(0); > + return -ENOSPC; > case XFS_SBS_IFREE: > lcounter = (long long)mp->m_sb.sb_ifree; > lcounter += delta; > @@ -1288,8 +1297,9 @@ xfs_mod_incore_sb( > int status; > > #ifdef HAVE_PERCPU_SB > - ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); > + ASSERT(field < XFS_SBS_IFREE || field > XFS_SBS_FDBLOCKS); > #endif Not really sure why the above changes, since XFS_SBS_IFREE > XFS_SBS_ICOUNT. This goes away, so it doesn't matter. :) Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx> > + > spin_lock(&mp->m_sb_lock); > status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); > spin_unlock(&mp->m_sb_lock); > @@ -1492,7 +1502,6 @@ xfs_icsb_cpu_notify( > case CPU_ONLINE: > case CPU_ONLINE_FROZEN: > xfs_icsb_lock(mp); > - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); > xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); > xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); > xfs_icsb_unlock(mp); > @@ -1504,17 +1513,14 @@ xfs_icsb_cpu_notify( > * re-enable the counters. */ > xfs_icsb_lock(mp); > spin_lock(&mp->m_sb_lock); > - xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); > xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); > xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); > > - mp->m_sb.sb_icount += cntp->icsb_icount; > mp->m_sb.sb_ifree += cntp->icsb_ifree; > mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; > > memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); > > - xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0); > xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0); > xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0); > spin_unlock(&mp->m_sb_lock); > @@ -1531,11 +1537,18 @@ xfs_icsb_init_counters( > xfs_mount_t *mp) > { > xfs_icsb_cnts_t *cntp; > + int error; > int i; > > + error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); > + if (error) > + return error; > + > mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); > - if (mp->m_sb_cnts == NULL) > + if (!mp->m_sb_cnts) { > + percpu_counter_destroy(&mp->m_icount); > return -ENOMEM; > + } > > for_each_online_cpu(i) { > cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); > @@ -1563,13 +1576,14 @@ void > xfs_icsb_reinit_counters( > xfs_mount_t *mp) > { > + percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); > + > xfs_icsb_lock(mp); > /* > * start with all counters disabled so that the > * initial balance kicks us off correctly > */ > mp->m_icsb_counters = -1; > - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); > xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); > xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); > xfs_icsb_unlock(mp); > @@ -1583,6 +1597,9 @@ xfs_icsb_destroy_counters( > unregister_hotcpu_notifier(&mp->m_icsb_notifier); > free_percpu(mp->m_sb_cnts); > } > + > + percpu_counter_destroy(&mp->m_icount); > + > mutex_destroy(&mp->m_icsb_mutex); > } > > @@ -1645,7 +1662,6 @@ xfs_icsb_count( > > for_each_online_cpu(i) { > cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); > - cnt->icsb_icount += cntp->icsb_icount; > cnt->icsb_ifree += cntp->icsb_ifree; > cnt->icsb_fdblocks += cntp->icsb_fdblocks; > } > @@ -1659,7 +1675,7 @@ xfs_icsb_counter_disabled( > xfs_mount_t *mp, > xfs_sb_field_t field) > { > - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); > + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS)); > return test_bit(field, &mp->m_icsb_counters); > } > > @@ -1670,7 +1686,7 @@ xfs_icsb_disable_counter( > { > xfs_icsb_cnts_t cnt; > > - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); > + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS)); > > /* > * If we are already disabled, then there is nothing to do > @@ -1689,9 +1705,6 @@ xfs_icsb_disable_counter( > > xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT); > switch(field) { > - case XFS_SBS_ICOUNT: > - mp->m_sb.sb_icount = cnt.icsb_icount; > - break; > case XFS_SBS_IFREE: > mp->m_sb.sb_ifree = cnt.icsb_ifree; > break; > @@ -1716,15 +1729,12 @@ xfs_icsb_enable_counter( > xfs_icsb_cnts_t *cntp; > int i; > > - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); > + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS)); > > xfs_icsb_lock_all_counters(mp); > for_each_online_cpu(i) { > cntp = per_cpu_ptr(mp->m_sb_cnts, i); > switch (field) { > - case XFS_SBS_ICOUNT: > - cntp->icsb_icount = count + resid; > - break; > case XFS_SBS_IFREE: > cntp->icsb_ifree = count + resid; > break; > @@ -1750,8 +1760,6 @@ xfs_icsb_sync_counters_locked( > > xfs_icsb_count(mp, &cnt, flags); > > - if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) > - mp->m_sb.sb_icount = cnt.icsb_icount; > if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) > mp->m_sb.sb_ifree = cnt.icsb_ifree; > if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) > @@ -1805,12 +1813,6 @@ xfs_icsb_balance_counter_locked( > > /* update counters - first CPU gets residual*/ > switch (field) { > - case XFS_SBS_ICOUNT: > - count = mp->m_sb.sb_icount; > - resid = do_div(count, weight); > - if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) > - return; > - break; > case XFS_SBS_IFREE: > count = mp->m_sb.sb_ifree; > resid = do_div(count, weight); > @@ -1871,14 +1873,6 @@ again: > } > > switch (field) { > - case XFS_SBS_ICOUNT: > - lcounter = icsbp->icsb_icount; > - lcounter += delta; > - if (unlikely(lcounter < 0)) > - goto balance_counter; > - icsbp->icsb_icount = lcounter; > - break; > - > case XFS_SBS_IFREE: > lcounter = icsbp->icsb_ifree; > lcounter += delta; > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index a5b2ff8..457c6e3 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -41,7 +41,6 @@ struct xfs_da_geometry; > typedef struct xfs_icsb_cnts { > uint64_t icsb_fdblocks; > uint64_t icsb_ifree; > - uint64_t icsb_icount; > unsigned long icsb_flags; > } xfs_icsb_cnts_t; > > @@ -81,8 +80,11 @@ typedef struct xfs_mount { > struct super_block *m_super; > xfs_tid_t m_tid; /* next unused tid for fs */ > struct xfs_ail *m_ail; /* fs active log item list */ > - xfs_sb_t m_sb; /* copy of fs superblock */ > + > + struct xfs_sb m_sb; /* copy of fs superblock */ > spinlock_t m_sb_lock; /* sb counter lock */ > + struct percpu_counter m_icount; /* allocated inodes counter */ > + > struct xfs_buf *m_sb_bp; /* buffer for superblock */ > char *m_fsname; /* filesystem name */ > int m_fsname_len; /* strlen of fs name */ > @@ -377,6 +379,7 @@ extern void xfs_unmountfs(xfs_mount_t *); > extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); > extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, > uint, int); > +extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta); > extern int xfs_mount_log_sb(xfs_mount_t *); > extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); > extern int xfs_readsb(xfs_mount_t *, int); > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > index 40d2ac5..87e169f 100644 > --- a/fs/xfs/xfs_super.c > +++ b/fs/xfs/xfs_super.c > @@ -1087,6 +1087,7 @@ xfs_fs_statfs( > xfs_sb_t *sbp = &mp->m_sb; > struct xfs_inode *ip = XFS_I(dentry->d_inode); > __uint64_t fakeinos, id; > + __uint64_t icount; > xfs_extlen_t lsize; > __int64_t ffree; > > @@ -1098,6 +1099,7 @@ xfs_fs_statfs( > statp->f_fsid.val[1] = (u32)(id >> 32); > > xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); > + icount = percpu_counter_sum(&mp->m_icount); > > spin_lock(&mp->m_sb_lock); > statp->f_bsize = sbp->sb_blocksize; > @@ -1106,15 +1108,14 @@ xfs_fs_statfs( > statp->f_bfree = statp->f_bavail = > sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); > fakeinos = statp->f_bfree << sbp->sb_inopblog; > - statp->f_files = > - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); > + statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); > if (mp->m_maxicount) > statp->f_files = min_t(typeof(statp->f_files), > statp->f_files, > mp->m_maxicount); > > /* make sure statp->f_ffree does not underflow */ > - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); > + ffree = statp->f_files - (icount - sbp->sb_ifree); > statp->f_ffree = max_t(__int64_t, ffree, 0); > > spin_unlock(&mp->m_sb_lock); > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index eb90cd5..9bc742b 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -554,8 +554,7 @@ xfs_trans_unreserve_and_mod_sb( > } > > if (idelta) { > - error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, > - idelta, rsvd); > + error = xfs_mod_icount(mp, idelta); > if (error) > goto out_undo_fdblocks; > } > @@ -634,7 +633,7 @@ out_undo_ifreecount: > xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); > out_undo_icount: > if (idelta) > - xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); > + xfs_mod_icount(mp, -idelta); > out_undo_fdblocks: > if (blkdelta) > xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); > -- > 2.0.0 > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs