On Wed, Dec 11, 2024 at 09:54:34AM +0100, Christoph Hellwig wrote: > The main handling of the incore per-cpu freespace counters is already > handled in xfs_mod_freecounter for both the block and RT extent cases, > but the actual counter is passed in an special cases. > > Replace both the percpu counters and the resblks counters with arrays, > so that support reserved RT extents can be supported, which will be > needed for garbarge collection on zoned devices. > > Use helpers to access the freespace counters everywhere intead of > poking through the abstraction by using the percpu_count helpers > directly. This also switches the flooring of the frextents counter > to 0 in statfs for the rthinherit case to a manual min_t call to match > the handling of the fdblocks counter for normal file systems. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/libxfs/xfs_ialloc.c | 2 +- > fs/xfs/libxfs/xfs_metafile.c | 2 +- > fs/xfs/libxfs/xfs_sb.c | 7 +-- > fs/xfs/scrub/fscounters.c | 13 +++--- > fs/xfs/scrub/fscounters_repair.c | 4 +- > fs/xfs/scrub/newbt.c | 2 +- > fs/xfs/xfs_fsops.c | 27 ++++++------ > fs/xfs/xfs_fsops.h | 3 +- > fs/xfs/xfs_icache.c | 4 +- > fs/xfs/xfs_ioctl.c | 12 +++--- > fs/xfs/xfs_iomap.c | 9 ++-- > fs/xfs/xfs_mount.c | 58 ++++++++++++++++--------- > fs/xfs/xfs_mount.h | 65 +++++++++++++++++++--------- > fs/xfs/xfs_rtalloc.c | 2 +- > fs/xfs/xfs_super.c | 74 ++++++++++++++++++-------------- > fs/xfs/xfs_trace.h | 2 +- > 16 files changed, 171 insertions(+), 115 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c > index f3a840a425f5..57513ba19d6a 100644 > --- a/fs/xfs/libxfs/xfs_ialloc.c > +++ b/fs/xfs/libxfs/xfs_ialloc.c > @@ -1927,7 +1927,7 @@ xfs_dialloc( > * that we can immediately allocate, but then we allow allocation on the > * second pass if we fail to find an AG with free inodes in it. > */ > - if (percpu_counter_read_positive(&mp->m_fdblocks) < > + if (xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) < > mp->m_low_space[XFS_LOWSP_1_PCNT]) { > ok_alloc = false; > low_space = true; > diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c > index e151663cc9ef..c84820f5bdc6 100644 > --- a/fs/xfs/libxfs/xfs_metafile.c > +++ b/fs/xfs/libxfs/xfs_metafile.c > @@ -77,7 +77,7 @@ xfs_metafile_resv_can_cover( > * There aren't enough blocks left in the inode's reservation, but it > * isn't critical unless there also isn't enough free space. > */ > - return __percpu_counter_compare(&ip->i_mount->m_fdblocks, > + return xfs_compare_freecounter(ip->i_mount, XC_FREE_BLOCKS, > rhs - ip->i_delayed_blks, 2048) >= 0; > } > > diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c > index 3dc5f5dba162..090f133f4da3 100644 > --- a/fs/xfs/libxfs/xfs_sb.c > +++ b/fs/xfs/libxfs/xfs_sb.c > @@ -1266,7 +1266,7 @@ xfs_log_sb( > percpu_counter_sum_positive(&mp->m_ifree), > mp->m_sb.sb_icount); > mp->m_sb.sb_fdblocks = > - percpu_counter_sum_positive(&mp->m_fdblocks); > + max(0LL, xfs_sum_freecounter(mp, XC_FREE_BLOCKS)); > } > > /* > @@ -1275,9 +1275,10 @@ xfs_log_sb( > * we handle nearly-lockless reservations, so we must use the _positive > * variant here to avoid writing out nonsense frextents. > */ > - if (xfs_has_rtgroups(mp)) > + if (xfs_has_rtgroups(mp)) { > mp->m_sb.sb_frextents = > - percpu_counter_sum_positive(&mp->m_frextents); > + xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS); Curious. xfs_sum_freecounter returns percpu_counter_sum, not its _positive variant. This seems like a bug? Or at least an omitted max(0LL, ...) call? > + } > > xfs_sb_to_disk(bp->b_addr, &mp->m_sb); > xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); > diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c > index ca23cf4db6c5..732658a62a2d 100644 > --- a/fs/xfs/scrub/fscounters.c > +++ b/fs/xfs/scrub/fscounters.c > @@ -350,7 +350,7 @@ xchk_fscount_aggregate_agcounts( > * The global incore space reservation is taken from the incore > * counters, so leave that out of the computation. > */ > - fsc->fdblocks -= mp->m_resblks_avail; > + fsc->fdblocks -= mp->m_resblks[XC_FREE_BLOCKS].avail; > > /* > * Delayed allocation reservations are taken out of the incore counters > @@ -513,8 +513,8 @@ xchk_fscounters( > /* Snapshot the percpu counters. */ > icount = percpu_counter_sum(&mp->m_icount); > ifree = percpu_counter_sum(&mp->m_ifree); > - fdblocks = percpu_counter_sum(&mp->m_fdblocks); > - frextents = percpu_counter_sum(&mp->m_frextents); > + fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS); > + frextents = xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS); > > /* No negative values, please! */ > if (icount < 0 || ifree < 0) > @@ -589,15 +589,16 @@ xchk_fscounters( > try_again = true; > } > > - if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks, > - fsc->fdblocks)) { > + if (!xchk_fscount_within_range(sc, fdblocks, > + &mp->m_free[XC_FREE_BLOCKS], fsc->fdblocks)) { > if (fsc->frozen) > xchk_set_corrupt(sc); > else > try_again = true; > } > > - if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, > + if (!xchk_fscount_within_range(sc, frextents, > + &mp->m_free[XC_FREE_RTEXTENTS], > fsc->frextents - fsc->frextents_delayed)) { > if (fsc->frozen) > xchk_set_corrupt(sc); > diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c > index cda13447a373..8fb0db78489e 100644 > --- a/fs/xfs/scrub/fscounters_repair.c > +++ b/fs/xfs/scrub/fscounters_repair.c > @@ -64,7 +64,7 @@ xrep_fscounters( > > percpu_counter_set(&mp->m_icount, fsc->icount); > percpu_counter_set(&mp->m_ifree, fsc->ifree); > - percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks); > + xfs_set_freecounter(mp, XC_FREE_BLOCKS, fsc->fdblocks); > > /* > * Online repair is only supported on v5 file systems, which require > @@ -74,7 +74,7 @@ xrep_fscounters( > * track of the delalloc reservations separately, as they are are > * subtracted from m_frextents, but not included in sb_frextents. > */ > - percpu_counter_set(&mp->m_frextents, > + xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, > fsc->frextents - fsc->frextents_delayed); > if (!xfs_has_rtgroups(mp)) > mp->m_sb.sb_frextents = fsc->frextents; > diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c > index ac38f5843090..3e46b04f427f 100644 > --- a/fs/xfs/scrub/newbt.c > +++ b/fs/xfs/scrub/newbt.c > @@ -62,7 +62,7 @@ xrep_newbt_estimate_slack( > free = sc->sa.pag->pagf_freeblks; > sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag)); > } else { > - free = percpu_counter_sum(&sc->mp->m_fdblocks); > + free = xfs_sum_freecounter(sc->mp, XC_FREE_BLOCKS); > sz = sc->mp->m_sb.sb_dblocks; > } > > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index 455298503d01..bb2e31e338b8 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -366,6 +366,7 @@ xfs_growfs_log( > int > xfs_reserve_blocks( > struct xfs_mount *mp, > + enum xfs_free_counter ctr, > uint64_t request) > { > int64_t lcounter, delta; > @@ -373,6 +374,8 @@ xfs_reserve_blocks( > int64_t free; > int error = 0; > > + ASSERT(ctr < XC_FREE_NR); > + > /* > * With per-cpu counters, this becomes an interesting problem. we need > * to work out if we are freeing or allocation blocks first, then we can > @@ -391,16 +394,16 @@ xfs_reserve_blocks( > * counters directly since we shouldn't have any problems unreserving > * space. > */ > - if (mp->m_resblks > request) { > - lcounter = mp->m_resblks_avail - request; > + if (mp->m_resblks[ctr].total > request) { > + lcounter = mp->m_resblks[ctr].avail - request; > if (lcounter > 0) { /* release unused blocks */ > fdblks_delta = lcounter; > - mp->m_resblks_avail -= lcounter; > + mp->m_resblks[ctr].avail -= lcounter; > } > - mp->m_resblks = request; > + mp->m_resblks[ctr].total = request; > if (fdblks_delta) { > spin_unlock(&mp->m_sb_lock); > - xfs_add_fdblocks(mp, fdblks_delta); > + xfs_add_freecounter(mp, ctr, fdblks_delta); > spin_lock(&mp->m_sb_lock); > } > > @@ -409,7 +412,7 @@ xfs_reserve_blocks( > > /* > * If the request is larger than the current reservation, reserve the > - * blocks before we update the reserve counters. Sample m_fdblocks and > + * blocks before we update the reserve counters. Sample m_free and > * perform a partial reservation if the request exceeds free space. > * > * The code below estimates how many blocks it can request from > @@ -419,10 +422,10 @@ xfs_reserve_blocks( > * space to fill it because mod_fdblocks will refill an undersized > * reserve when it can. > */ > - free = percpu_counter_sum(&mp->m_fdblocks) - > - xfs_fdblocks_unavailable(mp); > - delta = request - mp->m_resblks; > - mp->m_resblks = request; > + free = xfs_sum_freecounter(mp, ctr) - > + xfs_freecounter_unavailable(mp, ctr); > + delta = request - mp->m_resblks[ctr].total; > + mp->m_resblks[ctr].total = request; > if (delta > 0 && free > 0) { > /* > * We'll either succeed in getting space from the free block > @@ -436,9 +439,9 @@ xfs_reserve_blocks( > */ > fdblks_delta = min(free, delta); > spin_unlock(&mp->m_sb_lock); > - error = xfs_dec_fdblocks(mp, fdblks_delta, 0); > + error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0); > if (!error) > - xfs_add_fdblocks(mp, fdblks_delta); > + xfs_add_freecounter(mp, ctr, fdblks_delta); > spin_lock(&mp->m_sb_lock); > } > out: > diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h > index 3e2f73bcf831..9d23c361ef56 100644 > --- a/fs/xfs/xfs_fsops.h > +++ b/fs/xfs/xfs_fsops.h > @@ -8,7 +8,8 @@ > > int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in); > int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in); > -int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request); > +int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt, > + uint64_t request); > int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags); > > int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp); > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c > index 7b6c026d01a1..c9ded501e89b 100644 > --- a/fs/xfs/xfs_icache.c > +++ b/fs/xfs/xfs_icache.c > @@ -2076,7 +2076,7 @@ xfs_inodegc_want_queue_rt_file( > if (!XFS_IS_REALTIME_INODE(ip)) > return false; > > - if (__percpu_counter_compare(&mp->m_frextents, > + if (xfs_compare_freecounter(mp, XC_FREE_RTEXTENTS, > mp->m_low_rtexts[XFS_LOWSP_5_PCNT], > XFS_FDBLOCKS_BATCH) < 0) > return true; > @@ -2104,7 +2104,7 @@ xfs_inodegc_want_queue_work( > if (items > mp->m_ino_geo.inodes_per_cluster) > return true; > > - if (__percpu_counter_compare(&mp->m_fdblocks, > + if (xfs_compare_freecounter(mp, XC_FREE_BLOCKS, > mp->m_low_space[XFS_LOWSP_5_PCNT], > XFS_FDBLOCKS_BATCH) < 0) > return true; > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index de8ba5345e17..d3cf62d81f0d 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c > @@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks( > error = mnt_want_write_file(filp); > if (error) > return error; > - error = xfs_reserve_blocks(mp, fsop.resblks); > + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks); > mnt_drop_write_file(filp); > if (error) > return error; > } > > spin_lock(&mp->m_sb_lock); > - fsop.resblks = mp->m_resblks; > - fsop.resblks_avail = mp->m_resblks_avail; > + fsop.resblks = mp->m_resblks[XC_FREE_BLOCKS].total; > + fsop.resblks_avail = mp->m_resblks[XC_FREE_BLOCKS].avail; > spin_unlock(&mp->m_sb_lock); > > if (copy_to_user(arg, &fsop, sizeof(fsop))) > @@ -1155,9 +1155,9 @@ xfs_ioctl_fs_counts( > struct xfs_fsop_counts out = { > .allocino = percpu_counter_read_positive(&mp->m_icount), > .freeino = percpu_counter_read_positive(&mp->m_ifree), > - .freedata = percpu_counter_read_positive(&mp->m_fdblocks) - > - xfs_fdblocks_unavailable(mp), > - .freertx = percpu_counter_read_positive(&mp->m_frextents), > + .freedata = xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) - > + xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS), > + .freertx = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS), > }; > > if (copy_to_user(uarg, &out, sizeof(out))) > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index b3783d7b8ebe..f3f4b5c328c3 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -432,13 +432,14 @@ xfs_quota_calc_throttle( > > static int64_t > xfs_iomap_freesp( > - struct percpu_counter *counter, > + struct xfs_mount *mp, > + unsigned int idx, > uint64_t low_space[XFS_LOWSP_MAX], > int *shift) > { > int64_t freesp; > > - freesp = percpu_counter_read_positive(counter); > + freesp = xfs_estimate_freecounter(mp, idx); > if (freesp < low_space[XFS_LOWSP_5_PCNT]) { > *shift = 2; > if (freesp < low_space[XFS_LOWSP_4_PCNT]) > @@ -537,10 +538,10 @@ xfs_iomap_prealloc_size( > > if (unlikely(XFS_IS_REALTIME_INODE(ip))) > freesp = xfs_rtbxlen_to_blen(mp, > - xfs_iomap_freesp(&mp->m_frextents, > + xfs_iomap_freesp(mp, XC_FREE_RTEXTENTS, > mp->m_low_rtexts, &shift)); > else > - freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space, > + freesp = xfs_iomap_freesp(mp, XC_FREE_BLOCKS, mp->m_low_space, > &shift); > > /* > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index 66b91b582691..4174035b2ac9 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -1058,7 +1058,8 @@ xfs_mountfs( > * we were already there on the last unmount. Warn if this occurs. > */ > if (!xfs_is_readonly(mp)) { > - error = xfs_reserve_blocks(mp, xfs_default_resblks(mp)); > + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, > + xfs_default_resblks(mp)); > if (error) > xfs_warn(mp, > "Unable to allocate reserve blocks. Continuing without reserve pool."); > @@ -1178,7 +1179,7 @@ xfs_unmountfs( > * we only every apply deltas to the superblock and hence the incore > * value does not matter.... > */ > - error = xfs_reserve_blocks(mp, 0); > + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0); > if (error) > xfs_warn(mp, "Unable to free reserved block pool. " > "Freespace may not be correct on next mount."); > @@ -1225,52 +1226,68 @@ xfs_fs_writable( > return true; > } > > +/* > + * Estimate the amount of free space that is not available to userspace and is > + * not explicitly reserved from the incore fdblocks. This includes: > + * > + * - The minimum number of blocks needed to support splitting a bmap btree > + * - The blocks currently in use by the freespace btrees because they record > + * the actual blocks that will fill per-AG metadata space reservations > + */ > +uint64_t > +xfs_freecounter_unavailable( > + struct xfs_mount *mp, > + enum xfs_free_counter ctr) > +{ > + if (ctr == XC_FREE_RTEXTENTS) > + return 0; > + return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); > +} > + > void > xfs_add_freecounter( > struct xfs_mount *mp, > - struct percpu_counter *counter, > + enum xfs_free_counter ctr, > uint64_t delta) > { > - bool has_resv_pool = (counter == &mp->m_fdblocks); > uint64_t res_used; > > /* > * If the reserve pool is depleted, put blocks back into it first. > * Most of the time the pool is full. > */ > - if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) { > - percpu_counter_add(counter, delta); > + if (likely(mp->m_resblks[ctr].total == mp->m_resblks[ctr].avail)) { > + percpu_counter_add(&mp->m_free[ctr], delta); > return; > } > > spin_lock(&mp->m_sb_lock); > - res_used = mp->m_resblks - mp->m_resblks_avail; > + res_used = mp->m_resblks[ctr].total - mp->m_resblks[ctr].avail; > if (res_used > delta) { > - mp->m_resblks_avail += delta; > + mp->m_resblks[ctr].avail += delta; > } else { > delta -= res_used; > - mp->m_resblks_avail = mp->m_resblks; > - percpu_counter_add(counter, delta); > + mp->m_resblks[ctr].avail = mp->m_resblks[ctr].total; > + percpu_counter_add(&mp->m_free[ctr], delta); > } > spin_unlock(&mp->m_sb_lock); > } > > + > +/* Adjust in-core free blocks or RT extents. */ > int > xfs_dec_freecounter( > struct xfs_mount *mp, > - struct percpu_counter *counter, > + enum xfs_free_counter ctr, > uint64_t delta, > bool rsvd) > { > + struct percpu_counter *counter = &mp->m_free[ctr]; > int64_t lcounter; > uint64_t set_aside = 0; > s32 batch; > - bool has_resv_pool; > > - ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents); > - has_resv_pool = (counter == &mp->m_fdblocks); > - if (rsvd) > - ASSERT(has_resv_pool); > + ASSERT(ctr < XC_FREE_NR); > > /* > * Taking blocks away, need to be more accurate the closer we > @@ -1297,8 +1314,7 @@ xfs_dec_freecounter( > * problems (i.e. transaction abort, pagecache discards, etc.) than > * slightly premature -ENOSPC. > */ > - if (has_resv_pool) > - set_aside = xfs_fdblocks_unavailable(mp); > + set_aside = xfs_freecounter_unavailable(mp, ctr); Nit: I think you can get rid of the set_aside = 0; above? > percpu_counter_add_batch(counter, -((int64_t)delta), batch); > if (__percpu_counter_compare(counter, set_aside, > XFS_FDBLOCKS_BATCH) >= 0) { > @@ -1312,12 +1328,12 @@ xfs_dec_freecounter( > */ > spin_lock(&mp->m_sb_lock); > percpu_counter_add(counter, delta); > - if (!has_resv_pool || !rsvd) > + if (!rsvd) > goto fdblocks_enospc; > > - lcounter = (long long)mp->m_resblks_avail - delta; > + lcounter = (long long)mp->m_resblks[ctr].avail - delta; > if (lcounter >= 0) { > - mp->m_resblks_avail = lcounter; > + mp->m_resblks[ctr].avail = lcounter; > spin_unlock(&mp->m_sb_lock); > return 0; > } > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index fbed172d6770..d92bce7bc184 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -105,6 +105,12 @@ struct xfs_groups { > uint64_t blkmask; > }; > > +enum xfs_free_counter { > + XC_FREE_BLOCKS, /* free block counter */ > + XC_FREE_RTEXTENTS, /* free rt extent counter */ > + XC_FREE_NR, > +}; > + > /* > * The struct xfsmount layout is optimised to separate read-mostly variables > * from variables that are frequently modified. We put the read-mostly variables > @@ -222,8 +228,7 @@ typedef struct xfs_mount { > spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */ > struct percpu_counter m_icount; /* allocated inodes counter */ > struct percpu_counter m_ifree; /* free inodes counter */ > - struct percpu_counter m_fdblocks; /* free block counter */ > - struct percpu_counter m_frextents; /* free rt extent counter */ > + struct percpu_counter m_free[XC_FREE_NR]; > > /* > * Count of data device blocks reserved for delayed allocations, > @@ -245,9 +250,11 @@ typedef struct xfs_mount { > atomic64_t m_allocbt_blks; > > struct xfs_groups m_groups[XG_TYPE_MAX]; > - uint64_t m_resblks; /* total reserved blocks */ > - uint64_t m_resblks_avail;/* available reserved blocks */ > - uint64_t m_resblks_save; /* reserved blks @ remount,ro */ > + struct { > + uint64_t total; /* total reserved blocks */ > + uint64_t avail; /* available reserved blocks */ > + uint64_t save; /* reserved blks @ remount,ro */ > + } m_resblks[XC_FREE_NR]; > struct delayed_work m_reclaim_work; /* background inode reclaim */ > struct dentry *m_debugfs; /* debugfs parent */ > struct xfs_kobj m_kobj; > @@ -646,45 +653,61 @@ extern void xfs_unmountfs(xfs_mount_t *); > */ > #define XFS_FDBLOCKS_BATCH 1024 > > +uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp, > + enum xfs_free_counter ctr); > + > +static inline s64 xfs_sum_freecounter(struct xfs_mount *mp, > + enum xfs_free_counter ctr) > +{ > + return percpu_counter_sum(&mp->m_free[ctr]); > +} > + > /* > - * Estimate the amount of free space that is not available to userspace and is > - * not explicitly reserved from the incore fdblocks. This includes: > - * > - * - The minimum number of blocks needed to support splitting a bmap btree > - * - The blocks currently in use by the freespace btrees because they record > - * the actual blocks that will fill per-AG metadata space reservations > + * This just provides and estimate without the cpu-local updates, use > + * xfs_sum_freecounter for the exact value. > */ > -static inline uint64_t > -xfs_fdblocks_unavailable( > - struct xfs_mount *mp) > +static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp, > + enum xfs_free_counter ctr) > +{ > + return percpu_counter_read_positive(&mp->m_free[ctr]); > +} > + > +static inline int xfs_compare_freecounter(struct xfs_mount *mp, > + enum xfs_free_counter ctr, s64 rhs, s32 batch) > +{ > + return __percpu_counter_compare(&mp->m_free[ctr], rhs, batch); > +} > + > +static inline void xfs_set_freecounter(struct xfs_mount *mp, > + enum xfs_free_counter ctr, uint64_t val) > { > - return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); > + percpu_counter_set(&mp->m_free[ctr], val); > } > > -int xfs_dec_freecounter(struct xfs_mount *mp, struct percpu_counter *counter, > +int xfs_dec_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, > uint64_t delta, bool rsvd); > -void xfs_add_freecounter(struct xfs_mount *mp, struct percpu_counter *counter, > +void xfs_add_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, > uint64_t delta); > > static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta, > bool reserved) > { > - return xfs_dec_freecounter(mp, &mp->m_fdblocks, delta, reserved); > + return xfs_dec_freecounter(mp, XC_FREE_BLOCKS, delta, reserved); > } > > static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta) > { > - xfs_add_freecounter(mp, &mp->m_fdblocks, delta); > + xfs_add_freecounter(mp, XC_FREE_BLOCKS, delta); > } > > static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta) > { > - return xfs_dec_freecounter(mp, &mp->m_frextents, delta, false); > + return xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, delta, false); > } > > static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta) > { > - xfs_add_freecounter(mp, &mp->m_frextents, delta); > + xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, delta); > } > > extern int xfs_readsb(xfs_mount_t *, int); > diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c > index bc18b694db75..8da2498417f5 100644 > --- a/fs/xfs/xfs_rtalloc.c > +++ b/fs/xfs/xfs_rtalloc.c > @@ -1519,7 +1519,7 @@ xfs_rtalloc_reinit_frextents( > spin_lock(&mp->m_sb_lock); > mp->m_sb.sb_frextents = val; > spin_unlock(&mp->m_sb_lock); > - percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); > + xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents); > return 0; > } > > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > index a74a0cc1f6f6..1960ee0aad45 100644 > --- a/fs/xfs/xfs_super.c > +++ b/fs/xfs/xfs_super.c > @@ -834,10 +834,11 @@ xfs_statfs_data( > struct kstatfs *st) > { > int64_t fdblocks = > - percpu_counter_sum(&mp->m_fdblocks); > + xfs_sum_freecounter(mp, XC_FREE_BLOCKS); > > /* make sure st->f_bfree does not underflow */ > - st->f_bfree = max(0LL, fdblocks - xfs_fdblocks_unavailable(mp)); > + st->f_bfree = max(0LL, > + fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS)); > st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp); > } > > @@ -852,9 +853,9 @@ xfs_statfs_rt( > struct kstatfs *st) > { > int64_t freertx = > - percpu_counter_sum_positive(&mp->m_frextents); > + xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS); Same question as I had for xfs_log_sb() about _positive. --D > > - st->f_bfree = xfs_rtbxlen_to_blen(mp, freertx); > + st->f_bfree = xfs_rtbxlen_to_blen(mp, max(0LL, freertx)); > st->f_blocks = mp->m_sb.sb_rblocks; > } > > @@ -920,24 +921,32 @@ xfs_fs_statfs( > } > > STATIC void > -xfs_save_resvblks(struct xfs_mount *mp) > +xfs_save_resvblks( > + struct xfs_mount *mp) > { > - mp->m_resblks_save = mp->m_resblks; > - xfs_reserve_blocks(mp, 0); > + enum xfs_free_counter i; > + > + for (i = 0; i < XC_FREE_NR; i++) { > + mp->m_resblks[i].save = mp->m_resblks[i].total; > + xfs_reserve_blocks(mp, i, 0); > + } > } > > STATIC void > -xfs_restore_resvblks(struct xfs_mount *mp) > +xfs_restore_resvblks( > + struct xfs_mount *mp) > { > - uint64_t resblks; > + uint64_t resblks; > + enum xfs_free_counter i; > > - if (mp->m_resblks_save) { > - resblks = mp->m_resblks_save; > - mp->m_resblks_save = 0; > - } else > - resblks = xfs_default_resblks(mp); > - > - xfs_reserve_blocks(mp, resblks); > + for (i = 0; i < XC_FREE_NR; i++) { > + if (mp->m_resblks[i].save) { > + resblks = mp->m_resblks[i].save; > + mp->m_resblks[i].save = 0; > + } else > + resblks = xfs_default_resblks(mp); > + xfs_reserve_blocks(mp, i, resblks); > + } > } > > /* > @@ -1063,7 +1072,8 @@ static int > xfs_init_percpu_counters( > struct xfs_mount *mp) > { > - int error; > + int error; > + enum xfs_free_counter i; > > error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); > if (error) > @@ -1073,30 +1083,28 @@ xfs_init_percpu_counters( > if (error) > goto free_icount; > > - error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); > - if (error) > - goto free_ifree; > - > error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); > if (error) > - goto free_fdblocks; > + goto free_ifree; > > error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); > if (error) > goto free_delalloc; > > - error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); > - if (error) > - goto free_delalloc_rt; > + for (i = 0; i < XC_FREE_NR; i++) { > + error = percpu_counter_init(&mp->m_free[i], 0, GFP_KERNEL); > + if (error) > + goto free_freecounters; > + } > > return 0; > > -free_delalloc_rt: > +free_freecounters: > + while (--i > 0) > + percpu_counter_destroy(&mp->m_free[i]); > percpu_counter_destroy(&mp->m_delalloc_rtextents); > free_delalloc: > percpu_counter_destroy(&mp->m_delalloc_blks); > -free_fdblocks: > - percpu_counter_destroy(&mp->m_fdblocks); > free_ifree: > percpu_counter_destroy(&mp->m_ifree); > free_icount: > @@ -1110,24 +1118,26 @@ xfs_reinit_percpu_counters( > { > percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); > percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); > - percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); > - percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); > + xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks); > + xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents); > } > > static void > xfs_destroy_percpu_counters( > struct xfs_mount *mp) > { > + enum xfs_free_counter i; > + > + for (i = 0; i < XC_FREE_NR; i++) > + percpu_counter_destroy(&mp->m_free[i]); > percpu_counter_destroy(&mp->m_icount); > percpu_counter_destroy(&mp->m_ifree); > - percpu_counter_destroy(&mp->m_fdblocks); > ASSERT(xfs_is_shutdown(mp) || > percpu_counter_sum(&mp->m_delalloc_rtextents) == 0); > percpu_counter_destroy(&mp->m_delalloc_rtextents); > ASSERT(xfs_is_shutdown(mp) || > percpu_counter_sum(&mp->m_delalloc_blks) == 0); > percpu_counter_destroy(&mp->m_delalloc_blks); > - percpu_counter_destroy(&mp->m_frextents); > } > > static int > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index 4fe689410eb6..15dec76fec10 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -5621,7 +5621,7 @@ DECLARE_EVENT_CLASS(xfs_metafile_resv_class, > > __entry->dev = mp->m_super->s_dev; > __entry->ino = ip->i_ino; > - __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks); > + __entry->freeblks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS); > __entry->reserved = ip->i_delayed_blks; > __entry->asked = ip->i_meta_resv_asked; > __entry->used = ip->i_nblocks; > -- > 2.45.2 > >