Re: [PATCH 07/42] xfs: active perag reference counting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2023-01-19 at 09:44 +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> We need to be able to dynamically remove instantiated AGs from
> memory safely, either for shrinking the filesystem or paging AG
> state in and out of memory (e.g. supporting millions of AGs). This
> means we need to be able to safely exclude operations from accessing
> perags while dynamic removal is in progress.
> 
> To do this, introduce the concept of active and passive references.
> Active references are required for high level operations that make
> use of an AG for a given operation (e.g. allocation) and pin the
> perag in memory for the duration of the operation that is operating
> on the perag (e.g. transaction scope). This means we can fail to get
> an active reference to an AG, hence callers of the new active
> reference API must be able to handle lookup failure gracefully.
> 
> Passive references are used in low level code, where we might need
> to access the perag structure for the purposes of completing high
> level operations. For example, buffers need to use passive
> references because:
> - we need to be able to do metadata IO during operations like grow
>   and shrink transactions where high level active references to the
>   AG have already been blocked
> - buffers need to pin the perag until they are reclaimed from
>   memory, something that high level code has no direct control over.
> - unused cached buffers should not prevent a shrink from being
>   started.
> 
> Hence we have active references that will form exclusion barriers
> for operations to be performed on an AG, and passive references that
> will prevent reclaim of the perag until all objects with passive
> references have been reclaimed themselves.
> 
> This patch introduce xfs_perag_grab()/xfs_perag_rele() as the API
> for active AG reference functionality. We also need to convert the
> for_each_perag*() iterators to use active references, which will
> start the process of converting high level code over to using active
> references. Conversion of non-iterator based code to active
> references will be done in followup patches.
> 
> Note that the implementation using reference counting is really just
> a development vehicle for the API to ensure we don't have any leaks
> in the callers. Once we need to remove perag structures from memory
> dyanmically, we will need a much more robust per-ag state transition
> mechanism for preventing new references from being taken while we
> wait for existing references to drain before removal from memory can
> occur....
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
ok, I was able to follow it
Reviewed-by: Allison Henderson <allison.henderson@xxxxxxxxxx>

>  fs/xfs/libxfs/xfs_ag.c    | 70
> +++++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_ag.h    | 31 ++++++++++++-----
>  fs/xfs/scrub/bmap.c       |  2 +-
>  fs/xfs/scrub/fscounters.c |  4 +--
>  fs/xfs/xfs_fsmap.c        |  4 +--
>  fs/xfs/xfs_icache.c       |  2 +-
>  fs/xfs/xfs_iwalk.c        |  6 ++--
>  fs/xfs/xfs_reflink.c      |  2 +-
>  fs/xfs/xfs_trace.h        |  3 ++
>  9 files changed, 105 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index bb0c700afe3c..46e25c682bf4 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -94,6 +94,68 @@ xfs_perag_put(
>         trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref,
> _RET_IP_);
>  }
>  
> +/*
> + * Active references for perag structures. This is for short term
> access to the
> + * per ag structures for walking trees or accessing state. If an AG
> is being
> + * shrunk or is offline, then this will fail to find that AG and
> return NULL
> + * instead.
> + */
> +struct xfs_perag *
> +xfs_perag_grab(
> +       struct xfs_mount        *mp,
> +       xfs_agnumber_t          agno)
> +{
> +       struct xfs_perag        *pag;
> +
> +       rcu_read_lock();
> +       pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> +       if (pag) {
> +               trace_xfs_perag_grab(mp, pag->pag_agno,
> +                               atomic_read(&pag->pag_active_ref),
> _RET_IP_);
> +               if (!atomic_inc_not_zero(&pag->pag_active_ref))
> +                       pag = NULL;
> +       }
> +       rcu_read_unlock();
> +       return pag;
> +}
> +
> +/*
> + * search from @first to find the next perag with the given tag set.
> + */
> +struct xfs_perag *
> +xfs_perag_grab_tag(
> +       struct xfs_mount        *mp,
> +       xfs_agnumber_t          first,
> +       int                     tag)
> +{
> +       struct xfs_perag        *pag;
> +       int                     found;
> +
> +       rcu_read_lock();
> +       found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> +                                       (void **)&pag, first, 1,
> tag);
> +       if (found <= 0) {
> +               rcu_read_unlock();
> +               return NULL;
> +       }
> +       trace_xfs_perag_grab_tag(mp, pag->pag_agno,
> +                       atomic_read(&pag->pag_active_ref), _RET_IP_);
> +       if (!atomic_inc_not_zero(&pag->pag_active_ref))
> +               pag = NULL;
> +       rcu_read_unlock();
> +       return pag;
> +}
> +
> +void
> +xfs_perag_rele(
> +       struct xfs_perag        *pag)
> +{
> +       trace_xfs_perag_rele(pag->pag_mount, pag->pag_agno,
> +                       atomic_read(&pag->pag_active_ref), _RET_IP_);
> +       if (atomic_dec_and_test(&pag->pag_active_ref))
> +               wake_up(&pag->pag_active_wq);
> +}
> +
>  /*
>   * xfs_initialize_perag_data
>   *
> @@ -196,6 +258,10 @@ xfs_free_perag(
>                 cancel_delayed_work_sync(&pag->pag_blockgc_work);
>                 xfs_buf_hash_destroy(pag);
>  
> +               /* drop the mount's active reference */
> +               xfs_perag_rele(pag);
> +               XFS_IS_CORRUPT(pag->pag_mount,
> +                               atomic_read(&pag->pag_active_ref) !=
> 0);
>                 call_rcu(&pag->rcu_head, __xfs_free_perag);
>         }
>  }
> @@ -314,6 +380,7 @@ xfs_initialize_perag(
>                 INIT_DELAYED_WORK(&pag->pag_blockgc_work,
> xfs_blockgc_worker);
>                 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
>                 init_waitqueue_head(&pag->pagb_wait);
> +               init_waitqueue_head(&pag->pag_active_wq);
>                 pag->pagb_count = 0;
>                 pag->pagb_tree = RB_ROOT;
>  #endif /* __KERNEL__ */
> @@ -322,6 +389,9 @@ xfs_initialize_perag(
>                 if (error)
>                         goto out_remove_pag;
>  
> +               /* Active ref owned by mount indicates AG is online.
> */
> +               atomic_set(&pag->pag_active_ref, 1);
> +
>                 /* first new pag is fully initialized */
>                 if (first_initialised == NULLAGNUMBER)
>                         first_initialised = index;
> diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
> index 191b22b9a35b..aeb21c8df201 100644
> --- a/fs/xfs/libxfs/xfs_ag.h
> +++ b/fs/xfs/libxfs/xfs_ag.h
> @@ -32,7 +32,9 @@ struct xfs_ag_resv {
>  struct xfs_perag {
>         struct xfs_mount *pag_mount;    /* owner filesystem */
>         xfs_agnumber_t  pag_agno;       /* AG this structure belongs
> to */
> -       atomic_t        pag_ref;        /* perag reference count */
> +       atomic_t        pag_ref;        /* passive reference count */
> +       atomic_t        pag_active_ref; /* active reference count */
> +       wait_queue_head_t pag_active_wq;/* woken active_ref falls to
> zero */
>         char            pagf_init;      /* this agf's entry is
> initialized */
>         char            pagi_init;      /* this agi's entry is
> initialized */
>         char            pagf_metadata;  /* the agf is preferred to be
> metadata */
> @@ -111,11 +113,18 @@ int xfs_initialize_perag(struct xfs_mount *mp,
> xfs_agnumber_t agcount,
>  int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t
> agno);
>  void xfs_free_perag(struct xfs_mount *mp);
>  
> +/* Passive AG references */
>  struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t
> agno);
>  struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp,
> xfs_agnumber_t agno,
>                 unsigned int tag);
>  void xfs_perag_put(struct xfs_perag *pag);
>  
> +/* Active AG references */
> +struct xfs_perag *xfs_perag_grab(struct xfs_mount *,
> xfs_agnumber_t);
> +struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *,
> xfs_agnumber_t,
> +                                  int tag);
> +void xfs_perag_rele(struct xfs_perag *pag);
> +
>  /*
>   * Per-ag geometry infomation and validation
>   */
> @@ -193,14 +202,18 @@ xfs_perag_next(
>         struct xfs_mount        *mp = pag->pag_mount;
>  
>         *agno = pag->pag_agno + 1;
> -       xfs_perag_put(pag);
> -       if (*agno > end_agno)
> -               return NULL;
> -       return xfs_perag_get(mp, *agno);
> +       xfs_perag_rele(pag);
> +       while (*agno <= end_agno) {
> +               pag = xfs_perag_grab(mp, *agno);
> +               if (pag)
> +                       return pag;
> +               (*agno)++;
> +       }
> +       return NULL;
>  }
>  
>  #define for_each_perag_range(mp, agno, end_agno, pag) \
> -       for ((pag) = xfs_perag_get((mp), (agno)); \
> +       for ((pag) = xfs_perag_grab((mp), (agno)); \
>                 (pag) != NULL; \
>                 (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
>  
> @@ -213,11 +226,11 @@ xfs_perag_next(
>         for_each_perag_from((mp), (agno), (pag))
>  
>  #define for_each_perag_tag(mp, agno, pag, tag) \
> -       for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
> +       for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag));
> \
>                 (pag) != NULL; \
>                 (agno) = (pag)->pag_agno + 1, \
> -               xfs_perag_put(pag), \
> -               (pag) = xfs_perag_get_tag((mp), (agno), (tag)))
> +               xfs_perag_rele(pag), \
> +               (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
>  
>  struct aghdr_init_data {
>         /* per ag data */
> diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
> index d50d0eab196a..dbbc7037074c 100644
> --- a/fs/xfs/scrub/bmap.c
> +++ b/fs/xfs/scrub/bmap.c
> @@ -662,7 +662,7 @@ xchk_bmap_check_rmaps(
>                 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
>                 if (error ||
>                     (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
> -                       xfs_perag_put(pag);
> +                       xfs_perag_rele(pag);
>                         return error;
>                 }
>         }
> diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
> index 4777e7b89fdc..ef97670970c3 100644
> --- a/fs/xfs/scrub/fscounters.c
> +++ b/fs/xfs/scrub/fscounters.c
> @@ -117,7 +117,7 @@ xchk_fscount_warmup(
>         if (agi_bp)
>                 xfs_buf_relse(agi_bp);
>         if (pag)
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         return error;
>  }
>  
> @@ -249,7 +249,7 @@ xchk_fscount_aggregate_agcounts(
>  
>         }
>         if (pag)
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         if (error) {
>                 xchk_set_incomplete(sc);
>                 return error;
> diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
> index 88a88506ffff..120d284a03fe 100644
> --- a/fs/xfs/xfs_fsmap.c
> +++ b/fs/xfs/xfs_fsmap.c
> @@ -688,11 +688,11 @@ __xfs_getfsmap_datadev(
>                 info->agf_bp = NULL;
>         }
>         if (info->pag) {
> -               xfs_perag_put(info->pag);
> +               xfs_perag_rele(info->pag);
>                 info->pag = NULL;
>         } else if (pag) {
>                 /* loop termination case */
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         }
>  
>         return error;
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index ddeaccc04aec..0f4a014dded3 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -1767,7 +1767,7 @@ xfs_icwalk(
>                 if (error) {
>                         last_error = error;
>                         if (error == -EFSCORRUPTED) {
> -                               xfs_perag_put(pag);
> +                               xfs_perag_rele(pag);
>                                 break;
>                         }
>                 }
> diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
> index 7558486f4937..c31857d903a4 100644
> --- a/fs/xfs/xfs_iwalk.c
> +++ b/fs/xfs/xfs_iwalk.c
> @@ -591,7 +591,7 @@ xfs_iwalk(
>         }
>  
>         if (iwag.pag)
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         xfs_iwalk_free(&iwag);
>         return error;
>  }
> @@ -683,7 +683,7 @@ xfs_iwalk_threaded(
>                         break;
>         }
>         if (pag)
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         if (polled)
>                 xfs_pwork_poll(&pctl);
>         return xfs_pwork_destroy(&pctl);
> @@ -776,7 +776,7 @@ xfs_inobt_walk(
>         }
>  
>         if (iwag.pag)
> -               xfs_perag_put(pag);
> +               xfs_perag_rele(pag);
>         xfs_iwalk_free(&iwag);
>         return error;
>  }
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 57bf59ff4854..f5dc46ce9803 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -927,7 +927,7 @@ xfs_reflink_recover_cow(
>         for_each_perag(mp, agno, pag) {
>                 error = xfs_refcount_recover_cow_leftovers(mp, pag);
>                 if (error) {
> -                       xfs_perag_put(pag);
> +                       xfs_perag_rele(pag);
>                         break;
>                 }
>         }
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 7dc57db6aa42..f0b62054ea68 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -189,6 +189,9 @@ DEFINE_EVENT(xfs_perag_class, name, \
>  DEFINE_PERAG_REF_EVENT(xfs_perag_get);
>  DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
>  DEFINE_PERAG_REF_EVENT(xfs_perag_put);
> +DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
> +DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag);
> +DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
>  DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
>  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
>  





[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux