Hi Darrick, On Tue, Apr 20, 2021 at 09:22:50AM -0700, Darrick J. Wong wrote: > On Tue, Apr 20, 2021 at 07:08:55PM +0800, Gao Xiang wrote: > > As Dave mentioned [1], "/me is now wondering why we even bother > > with !lazy-count anymore. > > > > We've updated the agr btree block accounting unconditionally since > > lazy-count was added, and scrub will always report a mismatch in > > counts if they exist regardless of lazy-count. So why don't we just > > start ignoring the on-disk value and always use lazy-count based > > updates? " > > > > Therefore, turn on lazy sb counters if it's still disabled at the > > mount time, or at remount_rw if fs was mounted as read-only. > > xfs_initialize_perag_data() is reused here since no need to scan > > agf/agi once more again. > > > > After this patch, we could get rid of this whole set of subtle > > conditional behaviours in the codebase. > > > > [1] https://lore.kernel.org/r/20210417223201.GU63242@xxxxxxxxxxxxxxxxxxx > > Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxx> > > --- > > Enabling lazysbcount is only addressed in this patch, I'll send > > out a seperated following patch to cleanup all unused conditions > > later. > > > > Also tr_sb is reused here since only agf is modified for each ag, > > and before lazysbcount sb feature is enabled (m_update_sb = true), > > agf_btreeblks field shouldn't matter for such AGs. > > > > fs/xfs/libxfs/xfs_format.h | 6 +++ > > fs/xfs/libxfs/xfs_sb.c | 93 +++++++++++++++++++++++++++++++++++--- > > fs/xfs/xfs_mount.c | 2 +- > > fs/xfs/xfs_super.c | 5 ++ > > 4 files changed, 98 insertions(+), 8 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > > index 76e2461b9e66..9081d7876d66 100644 > > --- a/fs/xfs/libxfs/xfs_format.h > > +++ b/fs/xfs/libxfs/xfs_format.h > > @@ -385,6 +385,12 @@ static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) > > (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); > > } > > > > +static inline void xfs_sb_version_addlazysbcount(struct xfs_sb *sbp) > > +{ > > + sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; > > + sbp->sb_features2 |= XFS_SB_VERSION2_LAZYSBCOUNTBIT; > > +} > > + > > static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) > > { > > return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || > > diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c > > index 423dada3f64c..6353e0d4cab1 100644 > > --- a/fs/xfs/libxfs/xfs_sb.c > > +++ b/fs/xfs/libxfs/xfs_sb.c > > @@ -18,6 +18,7 @@ > > #include "xfs_trace.h" > > #include "xfs_trans.h" > > #include "xfs_buf_item.h" > > +#include "xfs_btree.h" > > #include "xfs_bmap_btree.h" > > #include "xfs_alloc_btree.h" > > #include "xfs_log.h" > > @@ -841,6 +842,55 @@ xfs_sb_mount_common( > > mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); > > } > > > > +static int > > +xfs_fixup_agf_btreeblks( > > + struct xfs_mount *mp, > > + struct xfs_trans *tp, > > + struct xfs_buf *agfbp, > > + xfs_agnumber_t agno) > > +{ > > + struct xfs_btree_cur *cur; > > + struct xfs_perag *pag = agfbp->b_pag; > > + struct xfs_agf *agf = agfbp->b_addr; > > + xfs_agblock_t btreeblks, blocks; > > + int error; > > + > > + cur = xfs_allocbt_init_cursor(mp, tp, agfbp, agno, XFS_BTNUM_BNO); > > + error = xfs_btree_count_blocks(cur, &blocks); > > + if (error) > > + goto err; > > + xfs_btree_del_cursor(cur, error); > > + btreeblks = blocks - 1; > > + > > + cur = xfs_allocbt_init_cursor(mp, tp, agfbp, agno, XFS_BTNUM_CNT); > > + error = xfs_btree_count_blocks(cur, &blocks); > > + if (error) > > + goto err; > > + xfs_btree_del_cursor(cur, error); > > + btreeblks += blocks - 1; > > + > > + /* > > + * although rmapbt doesn't exist in v4 fses, but it'd be better > > + * to turn it as a generic helper. > > + */ > > + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { > > + cur = xfs_rmapbt_init_cursor(mp, tp, agfbp, agno); > > + error = xfs_btree_count_blocks(cur, &blocks); > > + if (error) > > + goto err; > > + xfs_btree_del_cursor(cur, error); > > + btreeblks += blocks - 1; > > + } > > + > > + agf->agf_btreeblks = cpu_to_be32(btreeblks); > > + pag->pagf_btreeblks = btreeblks; > > + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_BTREEBLKS); > > + return 0; > > +err: > > + xfs_btree_del_cursor(cur, error); > > + return error; > > +} > > + > > /* > > * xfs_initialize_perag_data > > * > > @@ -864,27 +914,51 @@ xfs_initialize_perag_data( > > uint64_t btree = 0; > > uint64_t fdblocks; > > int error = 0; > > + bool conv = !(mp->m_flags & XFS_MOUNT_RDONLY) && > > + !xfs_sb_version_haslazysbcount(sbp); > > + > > + if (conv) > > + xfs_warn(mp, "enabling lazy-counters..."); > > > > for (index = 0; index < agcount; index++) { > > + struct xfs_trans *tp = NULL; > > + struct xfs_buf *agfbp; > > + > > + if (conv) { > > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, > > + 0, 0, 0, &tp); > > + if (error) > > + return error; > > + } > > + > > /* > > - * read the agf, then the agi. This gets us > > + * read the agi, then the agf. This gets us > > * all the information we need and populates the > > * per-ag structures for us. > > */ > > - error = xfs_alloc_pagf_init(mp, NULL, index, 0); > > - if (error) > > + error = xfs_ialloc_pagi_init(mp, tp, index); > > + if (error) { > > +err_out: > > + if (tp) > > + xfs_trans_cancel(tp); > > return error; > > + } > > > > - error = xfs_ialloc_pagi_init(mp, NULL, index); > > + error = xfs_alloc_read_agf(mp, tp, index, 0, &agfbp); > > if (error) > > - return error; > > - pag = xfs_perag_get(mp, index); > > + goto err_out; > > + pag = agfbp->b_pag; > > ifree += pag->pagi_freecount; > > ialloc += pag->pagi_count; > > bfree += pag->pagf_freeblks; > > bfreelst += pag->pagf_flcount; > > + if (tp) { > > + error = xfs_fixup_agf_btreeblks(mp, tp, agfbp, index); > > Lazysbcount upgrades should be done from a separate function, not mixed > in with perag initialization. I've seen some previous discussion about multiple AG total scan time cost. Yeah, if another extra scan really accepts here, I could update instead. > Also, why is it necessary to walk all the space btrees to set agf_btreeblks? If my understanding is correct, I think because without lazysbcount, even pagf_btreeblks is updated unconditionally now, but that counter is unreliable for quite ancient kernels which don't have such update logic. Kindly correct me if I'm wrong here. > > > + xfs_trans_commit(tp); > > + } else { > > + xfs_buf_relse(agfbp); > > + } > > btree += pag->pagf_btreeblks; > > - xfs_perag_put(pag); > > } > > fdblocks = bfree + bfreelst + btree; > > > > @@ -900,6 +974,11 @@ xfs_initialize_perag_data( > > goto out; > > } > > > > + if (conv) { > > + xfs_sb_version_addlazysbcount(sbp); > > + mp->m_update_sb = true; > > + xfs_warn(mp, "lazy-counters has been enabled."); > > But we don't log the sb update? > > As far as the feature upgrade goes, is it necessary to bwrite the > primary super to disk (and then log the change)[1] to prevent a truly > ancient kernel that doesn't support lazysbcount from trying to recover > the log and ending up with an unsupported feature set? Not quite sure if it does harm to ancient kernels with such unsupported feature. may I ask for more details? :) but yeah, if any issues here, I should follow 1) bwrite sb block first; 2) log sb > > [1] https://lore.kernel.org/linux-xfs/161723934343.3149451.16679733325094950568.stgit@magnolia/ > > > + } > > /* Overwrite incore superblock counters with just-read data */ > > spin_lock(&mp->m_sb_lock); > > sbp->sb_ifree = ifree; > > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > > index cb1e2c4702c3..b3b13acd45d6 100644 > > --- a/fs/xfs/xfs_mount.c > > +++ b/fs/xfs/xfs_mount.c > > @@ -626,7 +626,7 @@ xfs_check_summary_counts( > > * superblock to be correct and we don't need to do anything here. > > * Otherwise, recalculate the summary counters. > > */ > > - if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) || > > + if ((xfs_sb_version_haslazysbcount(&mp->m_sb) && > > Not clear why the logic here inverts? .. thus xfs_initialize_perag_data() below can be called then. Thanks, Gao Xiang > > --D > > > XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) && > > !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) > > return 0; > > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > > index a2dab05332ac..16197a890c15 100644 > > --- a/fs/xfs/xfs_super.c > > +++ b/fs/xfs/xfs_super.c > > @@ -1678,6 +1678,11 @@ xfs_remount_rw( > > } > > > > mp->m_flags &= ~XFS_MOUNT_RDONLY; > > + if (!xfs_sb_version_haslazysbcount(sbp)) { > > + error = xfs_initialize_perag_data(mp, sbp->sb_agcount); > > + if (error) > > + return error; > > + } > > > > /* > > * If this is the first remount to writeable state we might have some > > -- > > 2.27.0 > > >