On Thu, Oct 01, 2020 at 11:03:09AM -0400, Brian Foster wrote: > The updated quotaoff logging algorithm depends on a runtime quiesce > of the transaction subsystem to guarantee all transactions after a > certain point detect quota subsystem changes. Implement this > mechanism using an internal lock, similar to the external filesystem > freeze mechanism. This is also somewhat analogous to the old percpu > transaction counter mechanism, but we don't actually need a counter. > > Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx> > --- > fs/xfs/xfs_aops.c | 2 ++ > fs/xfs/xfs_mount.h | 3 +++ > fs/xfs/xfs_super.c | 8 ++++++++ > fs/xfs/xfs_trans.c | 4 ++-- > fs/xfs/xfs_trans.h | 20 ++++++++++++++++++++ > 5 files changed, 35 insertions(+), 2 deletions(-) > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index b35611882ff9..214310c94de5 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -58,6 +58,7 @@ xfs_setfilesize_trans_alloc( > * we released it. > */ > __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); > + percpu_rwsem_release(&mp->m_trans_rwsem, true, _THIS_IP_); > /* > * We hand off the transaction to the completion thread now, so > * clear the flag here. > @@ -127,6 +128,7 @@ xfs_setfilesize_ioend( > */ > current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); > + percpu_rwsem_acquire(&ip->i_mount->m_trans_rwsem, true, _THIS_IP_); > > /* we abort the update if there was an IO error */ > if (error) { > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index dfa429b77ee2..f1083a9ce1f8 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -171,6 +171,9 @@ typedef struct xfs_mount { > */ > struct percpu_counter m_delalloc_blks; > > + /* lock for transaction quiesce (used by quotaoff) */ > + struct percpu_rw_semaphore m_trans_rwsem; > + > struct radix_tree_root m_perag_tree; /* per-ag accounting info */ > spinlock_t m_perag_lock; /* lock for m_perag_tree */ > uint64_t m_resblks; /* total reserved blocks */ > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > index baf5de30eebb..ff3ad5392e21 100644 > --- a/fs/xfs/xfs_super.c > +++ b/fs/xfs/xfs_super.c > @@ -1029,8 +1029,15 @@ xfs_init_percpu_counters( > if (error) > goto free_fdblocks; > > + /* not a counter, but close enough... */ > + error = percpu_init_rwsem(&mp->m_trans_rwsem); > + if (error) > + goto free_delalloc; > + > return 0; > > +free_delalloc: > + percpu_counter_destroy(&mp->m_delalloc_blks); > free_fdblocks: > percpu_counter_destroy(&mp->m_fdblocks); > free_ifree: > @@ -1053,6 +1060,7 @@ static void > xfs_destroy_percpu_counters( > struct xfs_mount *mp) > { > + percpu_free_rwsem(&mp->m_trans_rwsem); > percpu_counter_destroy(&mp->m_icount); > percpu_counter_destroy(&mp->m_ifree); > percpu_counter_destroy(&mp->m_fdblocks); > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index ca18a040336a..c07fa036549a 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -69,7 +69,7 @@ xfs_trans_free( > > trace_xfs_trans_free(tp, _RET_IP_); > if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) > - sb_end_intwrite(tp->t_mountp->m_super); > + xfs_trans_end(tp->t_mountp); > xfs_trans_free_dqinfo(tp); > kmem_cache_free(xfs_trans_zone, tp); > } > @@ -265,7 +265,7 @@ xfs_trans_alloc( > */ > tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); > if (!(flags & XFS_TRANS_NO_WRITECOUNT)) > - sb_start_intwrite(mp->m_super); > + xfs_trans_start(mp); > > /* > * Zero-reservation ("empty") transactions can't modify anything, so > diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h > index f46534b75236..af54c17a22c0 100644 > --- a/fs/xfs/xfs_trans.h > +++ b/fs/xfs/xfs_trans.h > @@ -209,6 +209,26 @@ xfs_trans_read_buf( > flags, bpp, ops); > } > > +/* > + * Context tracking helpers for external (i.e. fs freeze) and internal > + * transaction quiesce. > + */ > +static inline void > +xfs_trans_start( > + struct xfs_mount *mp) > +{ > + sb_start_intwrite(mp->m_super); > + percpu_down_read(&mp->m_trans_rwsem); /me wonders, have you noticed any extra cpu overhead with this? So far it looks ok to me, though I wonder if we could skip all this if CONFIG_XFS_QUOTA=n... --D > +} > + > +static inline void > +xfs_trans_end( > + struct xfs_mount *mp) > +{ > + percpu_up_read(&mp->m_trans_rwsem); > + sb_end_intwrite(mp->m_super); > +} > + > struct xfs_buf *xfs_trans_getsb(struct xfs_trans *); > > void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); > -- > 2.25.4 >