Re: [PATCH v5 05/12] ceph: cap tracking for async directory operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Feb 19, 2020 at 9:27 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>
> Track and correctly handle directory caps for asynchronous operations.
> Add aliases for Frc caps that we now designate at Dcu caps (when dealing
> with directories).
>
> Unlike file caps, we don't reclaim these when the session goes away, and
> instead preemptively release them. In-flight async dirops are instead
> handled during reconnect phase. The client needs to re-do a synchronous
> operation in order to re-get directory caps.
>
> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> ---
>  fs/ceph/caps.c               | 29 ++++++++++++++++++++---------
>  fs/ceph/mds_client.c         | 31 ++++++++++++++++++++++++++-----
>  fs/ceph/mds_client.h         |  6 +++++-
>  include/linux/ceph/ceph_fs.h |  6 ++++++
>  4 files changed, 57 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 295837215a3a..d6c5ee33f30f 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -992,7 +992,11 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
>  int __ceph_caps_wanted(struct ceph_inode_info *ci)
>  {
>         int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
> -       if (!S_ISDIR(ci->vfs_inode.i_mode)) {
> +       if (S_ISDIR(ci->vfs_inode.i_mode)) {
> +               /* we want EXCL if holding caps of dir ops */
> +               if (w & CEPH_CAP_ANY_DIR_OPS)
> +                       w |= CEPH_CAP_FILE_EXCL;
> +       } else {
>                 /* we want EXCL if dirty data */
>                 if (w & CEPH_CAP_FILE_BUFFER)
>                         w |= CEPH_CAP_FILE_EXCL;
> @@ -1890,10 +1894,13 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>                          * revoking the shared cap on every create/unlink
>                          * operation.
>                          */
> -                       if (IS_RDONLY(inode))
> +                       if (IS_RDONLY(inode)) {
>                                 want = CEPH_CAP_ANY_SHARED;
> -                       else
> -                               want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
> +                       } else {
> +                               want = CEPH_CAP_ANY_SHARED |
> +                                      CEPH_CAP_FILE_EXCL |
> +                                      CEPH_CAP_ANY_DIR_OPS;
> +                       }
>                         retain |= want;
>                 } else {
>
> @@ -2750,13 +2757,17 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
>         int ret;
>
>         BUG_ON(need & ~CEPH_CAP_FILE_RD);
> -       BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
> -       ret = ceph_pool_perm_check(inode, need);
> -       if (ret < 0)
> -               return ret;
> +       if (need) {
> +               ret = ceph_pool_perm_check(inode, need);
> +               if (ret < 0)
> +                       return ret;
> +       }
>
> +       BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
> +                       CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
> +                       CEPH_CAP_ANY_DIR_OPS));
>         ret = try_get_cap_refs(inode, need, want, 0,
> -                              (nonblock ? NON_BLOCKING : 0), got);
> +                              nonblock ? NON_BLOCKING : 0, got);
>         return ret == -EAGAIN ? 0 : ret;
>  }
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 38eb9dd5062b..ef3dd6fe2f4d 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -699,6 +699,7 @@ void ceph_mdsc_release_request(struct kref *kref)
>         struct ceph_mds_request *req = container_of(kref,
>                                                     struct ceph_mds_request,
>                                                     r_kref);
> +       ceph_mdsc_release_dir_caps(req);

I think we can do this in complete_request()

>         destroy_reply_info(&req->r_reply_info);
>         if (req->r_request)
>                 ceph_msg_put(req->r_request);
> @@ -3280,6 +3281,17 @@ static void handle_session(struct ceph_mds_session *session,
>         return;
>  }
>
> +void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
> +{
> +       int dcaps;
> +
> +       dcaps = xchg(&req->r_dir_caps, 0);
> +       if (dcaps) {
> +               dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
> +               ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> +       }
> +}
> +
>  /*
>   * called under session->mutex.
>   */
> @@ -3307,9 +3319,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
>                         continue;
>                 if (req->r_attempts == 0)
>                         continue; /* only old requests */
> -               if (req->r_session &&
> -                   req->r_session->s_mds == session->s_mds)
> -                       __send_request(mdsc, session, req, true);
> +               if (!req->r_session)
> +                       continue;
> +               if (req->r_session->s_mds != session->s_mds)
> +                       continue;
> +
> +               ceph_mdsc_release_dir_caps(req);
> +
> +               __send_request(mdsc, session, req, true);
>         }
>         mutex_unlock(&mdsc->mutex);
>  }
> @@ -3393,7 +3410,7 @@ static int send_reconnect_partial(struct ceph_reconnect_state *recon_state)
>  /*
>   * Encode information about a cap for a reconnect with the MDS.
>   */
> -static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
> +static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
>                           void *arg)
>  {
>         union {
> @@ -3416,6 +3433,10 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>         cap->mseq = 0;       /* and migrate_seq */
>         cap->cap_gen = cap->session->s_cap_gen;
>
> +       /* These are lost when the session goes away */
> +       if (S_ISDIR(inode->i_mode))
> +               cap->issued &= ~CEPH_CAP_ANY_DIR_OPS;
> +
>         if (recon_state->msg_version >= 2) {
>                 rec.v2.cap_id = cpu_to_le64(cap->cap_id);
>                 rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
> @@ -3712,7 +3733,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>                 recon_state.msg_version = 2;
>         }
>         /* trsaverse this session's caps */
> -       err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state);
> +       err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state);
>
>         spin_lock(&session->s_cap_lock);
>         session->s_cap_reconnect = 0;
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 8043f2b439b1..f10d342ea585 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -284,8 +284,11 @@ struct ceph_mds_request {
>         struct ceph_msg  *r_request;  /* original request */
>         struct ceph_msg  *r_reply;
>         struct ceph_mds_reply_info_parsed r_reply_info;
> -       struct page *r_locked_page;
>         int r_err;
> +
> +
> +       struct page *r_locked_page;
> +       int r_dir_caps;
>         int r_num_caps;
>         u32               r_readdir_offset;
>
> @@ -489,6 +492,7 @@ extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
>  extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
>                                 struct inode *dir,
>                                 struct ceph_mds_request *req);
> +extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
>  static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
>  {
>         kref_get(&req->r_kref);
> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> index 94cc4b047987..91d09cf37649 100644
> --- a/include/linux/ceph/ceph_fs.h
> +++ b/include/linux/ceph/ceph_fs.h
> @@ -663,6 +663,12 @@ int ceph_flags_to_mode(int flags);
>  #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
>                         CEPH_LOCK_IXATTR)
>
> +/* cap masks async dir operations */
> +#define CEPH_CAP_DIR_CREATE    CEPH_CAP_FILE_CACHE
> +#define CEPH_CAP_DIR_UNLINK    CEPH_CAP_FILE_RD
> +#define CEPH_CAP_ANY_DIR_OPS   (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | \
> +                                CEPH_CAP_FILE_WREXTEND | CEPH_CAP_FILE_LAZYIO)
> +
>  int ceph_caps_for_mode(int mode);
>
>  enum {
> --
> 2.24.1
>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux