Re: [PATCH v3 1/2] ceph: convert mdsc->cap_dirty to a per-session list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 7, 2020 at 3:42 AM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>
> This is a per-sb list now, but that makes it difficult to tell when
> the cap is the last dirty one associated with the session. Switch
> this to be a per-session list, but continue using the
> mdsc->cap_dirty_lock to protect the lists.
>
> This list is only ever walked in ceph_flush_dirty_caps, so change that
> to walk the sessions array and then flush the caps for inodes on each
> session's list.
>
> If the auth cap ever changes while the inode has dirty caps, then
> move the inode to the appropriate session for the new auth_cap. Also,
> ensure that we never remove an auth cap while the inode is still on the
> s_cap_dirty list.
>
> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> ---
>  fs/ceph/caps.c       | 78 ++++++++++++++++++++++++++++++++++++--------
>  fs/ceph/mds_client.c |  2 +-
>  fs/ceph/mds_client.h |  5 +--
>  fs/ceph/super.h      | 19 +++++++++--
>  4 files changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 61808793e0c0..b27bca0e9651 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -597,6 +597,27 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
>         }
>  }
>
> +/**
> + * change_auth_cap_ses - move inode to appropriate lists when auth caps change
> + * @ci: inode to be moved
> + * @session: new auth caps session
> + */
> +static void change_auth_cap_ses(struct ceph_inode_info *ci,
> +                               struct ceph_mds_session *session)
> +{
> +       lockdep_assert_held(&ci->i_ceph_lock);
> +
> +       if (list_empty(&ci->i_dirty_item) && list_empty(&ci->i_flushing_item))
> +               return;
> +
> +       spin_lock(&session->s_mdsc->cap_dirty_lock);
> +       if (!list_empty(&ci->i_dirty_item))
> +               list_move(&ci->i_dirty_item, &session->s_cap_dirty);
> +       if (!list_empty(&ci->i_flushing_item))
> +               list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
> +       spin_unlock(&session->s_mdsc->cap_dirty_lock);
> +}
> +
>  /*
>   * Add a capability under the given MDS session.
>   *
> @@ -727,6 +748,9 @@ void ceph_add_cap(struct inode *inode,
>         if (flags & CEPH_CAP_FLAG_AUTH) {
>                 if (!ci->i_auth_cap ||
>                     ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
> +                       if (ci->i_auth_cap &&
> +                           ci->i_auth_cap->session != cap->session)
> +                               change_auth_cap_ses(ci, cap->session);
>                         ci->i_auth_cap = cap;
>                         cap->mds_wanted = wanted;
>                 }
> @@ -1123,8 +1147,10 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>
>         /* remove from inode's cap rbtree, and clear auth cap */
>         rb_erase(&cap->ci_node, &ci->i_caps);
> -       if (ci->i_auth_cap == cap)
> +       if (ci->i_auth_cap == cap) {
> +               WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
>                 ci->i_auth_cap = NULL;
> +       }
>
>         /* remove from session list */
>         spin_lock(&session->s_cap_lock);
> @@ -1690,6 +1716,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>              ceph_cap_string(was | mask));
>         ci->i_dirty_caps |= mask;
>         if (was == 0) {
> +               struct ceph_mds_session *session = ci->i_auth_cap->session;
> +
>                 WARN_ON_ONCE(ci->i_prealloc_cap_flush);
>                 swap(ci->i_prealloc_cap_flush, *pcf);
>
> @@ -1702,7 +1730,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>                      &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
>                 BUG_ON(!list_empty(&ci->i_dirty_item));
>                 spin_lock(&mdsc->cap_dirty_lock);
> -               list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
> +               list_add(&ci->i_dirty_item, &session->s_cap_dirty);
>                 spin_unlock(&mdsc->cap_dirty_lock);
>                 if (ci->i_flushing_caps == 0) {
>                         ihold(inode);
> @@ -3750,15 +3778,9 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>                         tcap->issue_seq = t_seq - 1;
>                         tcap->issued |= issued;
>                         tcap->implemented |= issued;
> -                       if (cap == ci->i_auth_cap)
> +                       if (cap == ci->i_auth_cap) {
>                                 ci->i_auth_cap = tcap;
> -
> -                       if (!list_empty(&ci->i_cap_flush_list) &&
> -                           ci->i_auth_cap == tcap) {
> -                               spin_lock(&mdsc->cap_dirty_lock);
> -                               list_move_tail(&ci->i_flushing_item,
> -                                              &tcap->session->s_cap_flushing);
> -                               spin_unlock(&mdsc->cap_dirty_lock);
> +                               change_auth_cap_ses(ci, tcap->session);
>                         }
>                 }
>                 __ceph_remove_cap(cap, false);
> @@ -4176,15 +4198,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
>  /*
>   * Flush all dirty caps to the mds
>   */
> -void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
> +static void flush_dirty_session_caps(struct ceph_mds_session *s)
>  {
> +       struct ceph_mds_client *mdsc = s->s_mdsc;
>         struct ceph_inode_info *ci;
>         struct inode *inode;
>
>         dout("flush_dirty_caps\n");
>         spin_lock(&mdsc->cap_dirty_lock);
> -       while (!list_empty(&mdsc->cap_dirty)) {
> -               ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
> +       while (!list_empty(&s->s_cap_dirty)) {
> +               ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info,
>                                       i_dirty_item);
>                 inode = &ci->vfs_inode;
>                 ihold(inode);
> @@ -4198,6 +4221,35 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
>         dout("flush_dirty_caps done\n");
>  }
>
> +static void iterate_sessions(struct ceph_mds_client *mdsc,
> +                            void (*cb)(struct ceph_mds_session *))
> +{
> +       int mds;
> +
> +       mutex_lock(&mdsc->mutex);
> +       for (mds = 0; mds < mdsc->max_sessions; ++mds) {
> +               struct ceph_mds_session *s;
> +
> +               if (!mdsc->sessions[mds])
> +                       continue;
> +
> +               s = ceph_get_mds_session(mdsc->sessions[mds]);
> +               if (!s)
> +                       continue;
> +
> +               mutex_unlock(&mdsc->mutex);
> +               cb(s);
> +               ceph_put_mds_session(s);
> +               mutex_lock(&mdsc->mutex);
> +       };
> +       mutex_unlock(&mdsc->mutex);
> +}
> +
> +void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
> +{
> +       iterate_sessions(mdsc, flush_dirty_session_caps);
> +}
> +
>  void __ceph_touch_fmode(struct ceph_inode_info *ci,
>                         struct ceph_mds_client *mdsc, int fmode)
>  {
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 9a8e7013aca1..be4ad7d28e3a 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -755,6 +755,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>         INIT_LIST_HEAD(&s->s_cap_releases);
>         INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work);
>
> +       INIT_LIST_HEAD(&s->s_cap_dirty);
>         INIT_LIST_HEAD(&s->s_cap_flushing);
>
>         mdsc->sessions[mds] = s;
> @@ -4375,7 +4376,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>         spin_lock_init(&mdsc->snap_flush_lock);
>         mdsc->last_cap_flush_tid = 1;
>         INIT_LIST_HEAD(&mdsc->cap_flush_list);
> -       INIT_LIST_HEAD(&mdsc->cap_dirty);
>         INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
>         mdsc->num_cap_flushing = 0;
>         spin_lock_init(&mdsc->cap_dirty_lock);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 8065863321fc..bd20257fb4c2 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -199,8 +199,10 @@ struct ceph_mds_session {
>         struct list_head  s_cap_releases; /* waiting cap_release messages */
>         struct work_struct s_cap_release_work;
>
> -       /* protected by mutex */
> +       /* both protected by s_mdsc->cap_dirty_lock */
> +       struct list_head  s_cap_dirty;        /* inodes w/ dirty caps */
>         struct list_head  s_cap_flushing;     /* inodes w/ flushing caps */
> +
>         unsigned long     s_renew_requested; /* last time we sent a renew req */
>         u64               s_renew_seq;
>
> @@ -424,7 +426,6 @@ struct ceph_mds_client {
>
>         u64               last_cap_flush_tid;
>         struct list_head  cap_flush_list;
> -       struct list_head  cap_dirty;        /* inodes with dirty caps */
>         struct list_head  cap_dirty_migrating; /* ...that are migration... */
>         int               num_cap_flushing; /* # caps we are flushing */
>         spinlock_t        cap_dirty_lock;   /* protects above items */
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index bb372859c0ad..3a95395a4217 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -351,9 +351,22 @@ struct ceph_inode_info {
>         struct rb_root i_caps;           /* cap list */
>         struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
>         unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
> -       struct list_head i_dirty_item, i_flushing_item; /* protected by
> -                                                        * mdsc->cap_dirty_lock
> -                                                        */
> +
> +       /*
> +        * Link to the the auth cap's session's s_cap_dirty list. s_cap_dirty
> +        * is protected by the mdsc->cap_dirty_lock, but each individual item
> +        * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty
> +        * requires the mdsc->cap_dirty_lock. List presence for an item can
> +        * be tested under the i_ceph_lock. Changing anything requires both.
> +        */
> +       struct list_head i_dirty_item;
> +
> +       /*
> +        * Link to session's s_cap_flushing list. Protected by
> +        * mdsc->cap_dirty_lock.
> +        */
> +       struct list_head i_flushing_item;
> +
>         /* we need to track cap writeback on a per-cap-bit basis, to allow
>          * overlapping, pipelined cap flushes to the mds.  we can probably
>          * reduce the tid to 8 bits if we're concerned about inode size. */
> --
> 2.25.1
>

Reviewed-by: "Yan, Zheng" <zyan@xxxxxxxxxx>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux