Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 26, 2020 at 11:42 AM Xiubo Li <xiubli@xxxxxxxxxx> wrote:
>
> On 2020/5/26 11:11, Yan, Zheng wrote:
> > On 5/25/20 7:16 PM, xiubli@xxxxxxxxxx wrote:
> >> From: Xiubo Li <xiubli@xxxxxxxxxx>
> >>
> >> In the ceph_check_caps() it may call the session lock/unlock stuff.
> >>
> >> There have some deadlock cases, like:
> >> handle_forward()
> >> ...
> >> mutex_lock(&mdsc->mutex)
> >> ...
> >> ceph_mdsc_put_request()
> >>    --> ceph_mdsc_release_request()
> >>      --> ceph_put_cap_request()
> >>        --> ceph_put_cap_refs()
> >>          --> ceph_check_caps()
> >> ...
> >> mutex_unlock(&mdsc->mutex)
> >>
> >> And also there maybe has some double session lock cases, like:
> >>
> >> send_mds_reconnect()
> >> ...
> >> mutex_lock(&session->s_mutex);
> >> ...
> >>    --> replay_unsafe_requests()
> >>      --> ceph_mdsc_release_dir_caps()
> >>        --> ceph_put_cap_refs()
> >>          --> ceph_check_caps()
> >> ...
> >> mutex_unlock(&session->s_mutex);
> >>
> >> URL: https://tracker.ceph.com/issues/45635
> >> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> >> ---
> >>   fs/ceph/caps.c       | 29 +++++++++++++++++++++++++++++
> >>   fs/ceph/inode.c      |  3 +++
> >>   fs/ceph/mds_client.c | 12 +++++++-----
> >>   fs/ceph/super.h      |  5 +++++
> >>   4 files changed, 44 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index 27c2e60..aea66c1 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info
> >> *ci, int had)
> >>           iput(inode);
> >>   }
> >>   +void ceph_async_put_cap_refs_work(struct work_struct *work)
> >> +{
> >> +    struct ceph_inode_info *ci = container_of(work, struct
> >> ceph_inode_info,
> >> +                          put_cap_refs_work);
> >> +    int caps;
> >> +
> >> +    spin_lock(&ci->i_ceph_lock);
> >> +    caps = xchg(&ci->pending_put_caps, 0);
> >> +    spin_unlock(&ci->i_ceph_lock);
> >> +
> >> +    ceph_put_cap_refs(ci, caps);
> >> +}
> >> +
> >> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
> >> +{
> >> +    struct inode *inode = &ci->vfs_inode;
> >> +
> >> +    spin_lock(&ci->i_ceph_lock);
> >> +    if (ci->pending_put_caps & had) {
> >> +            spin_unlock(&ci->i_ceph_lock);
> >> +        return;
> >> +    }
> >
> > this will cause cap ref leak.
>
> Ah, yeah, right.
>
>
> >
> > I thought about this again. all the trouble is caused by calling
> > ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
>
> And also in ceph_mdsc_release_request() it is calling
> ceph_put_cap_refs() directly in other 3 places.
>

putting CEPH_CAP_PIN does not trigger check_caps(). So only
ceph_mdsc_release_dir_caps() matters.

> BRs
>
> Xiubo
>
> > We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb()
> > for normal circumdtance. We just need to call
> > ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort
> > async request). In the 'session closed' case, we can use
> > ceph_put_cap_refs_no_check_caps()
> >
> > Regards
> > Yan, Zheng
> >
> >> +
> >> +    ci->pending_put_caps |= had;
> >> +    spin_unlock(&ci->i_ceph_lock);
> >> +
> >> +    queue_work(ceph_inode_to_client(inode)->inode_wq,
> >> +           &ci->put_cap_refs_work);
> >> +}
> >>   /*
> >>    * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
> >>    * context.  Adjust per-snap dirty page accounting as appropriate.
> >> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> >> index 357c937..303276a 100644
> >> --- a/fs/ceph/inode.c
> >> +++ b/fs/ceph/inode.c
> >> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block
> >> *sb)
> >>       INIT_LIST_HEAD(&ci->i_snap_realm_item);
> >>       INIT_LIST_HEAD(&ci->i_snap_flush_item);
> >>   +    INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
> >> +    ci->pending_put_caps = 0;
> >> +
> >>       INIT_WORK(&ci->i_work, ceph_inode_work);
> >>       ci->i_work_mask = 0;
> >>       memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> >> index 0e0ab01..40b31da 100644
> >> --- a/fs/ceph/mds_client.c
> >> +++ b/fs/ceph/mds_client.c
> >> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
> >>       if (req->r_reply)
> >>           ceph_msg_put(req->r_reply);
> >>       if (req->r_inode) {
> >> -        ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
> >> +        ceph_async_put_cap_refs(ceph_inode(req->r_inode),
> >> +                    CEPH_CAP_PIN);
> >>           /* avoid calling iput_final() in mds dispatch threads */
> >>           ceph_async_iput(req->r_inode);
> >>       }
> >>       if (req->r_parent) {
> >> -        ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
> >> +        ceph_async_put_cap_refs(ceph_inode(req->r_parent),
> >> +                    CEPH_CAP_PIN);
> >>           ceph_async_iput(req->r_parent);
> >>       }
> >>       ceph_async_iput(req->r_target_inode);
> >> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
> >>            * changed between the dir mutex being dropped and
> >>            * this request being freed.
> >>            */
> >> -        ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> >> -                  CEPH_CAP_PIN);
> >> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> >> +                    CEPH_CAP_PIN);
> >>           ceph_async_iput(req->r_old_dentry_dir);
> >>       }
> >>       kfree(req->r_path1);
> >> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct
> >> ceph_mds_request *req)
> >>       dcaps = xchg(&req->r_dir_caps, 0);
> >>       if (dcaps) {
> >>           dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
> >> -        ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> >> +        ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> >>       }
> >>   }
> >>   diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> >> index 226f19c..01d206f 100644
> >> --- a/fs/ceph/super.h
> >> +++ b/fs/ceph/super.h
> >> @@ -421,6 +421,9 @@ struct ceph_inode_info {
> >>       struct timespec64 i_btime;
> >>       struct timespec64 i_snap_btime;
> >>   +    struct work_struct put_cap_refs_work;
> >> +    int pending_put_caps;
> >> +
> >>       struct work_struct i_work;
> >>       unsigned long  i_work_mask;
> >>   @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct
> >> ceph_inode_info *ci, int caps,
> >>                   bool snap_rwsem_locked);
> >>   extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
> >>   extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
> >> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int
> >> had);
> >> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
> >>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci,
> >> int nr,
> >>                          struct ceph_snap_context *snapc);
> >>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
> >>
> >
>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux