On Tue, May 26, 2020 at 11:42 AM Xiubo Li <xiubli@xxxxxxxxxx> wrote: > > On 2020/5/26 11:11, Yan, Zheng wrote: > > On 5/25/20 7:16 PM, xiubli@xxxxxxxxxx wrote: > >> From: Xiubo Li <xiubli@xxxxxxxxxx> > >> > >> In the ceph_check_caps() it may call the session lock/unlock stuff. > >> > >> There have some deadlock cases, like: > >> handle_forward() > >> ... > >> mutex_lock(&mdsc->mutex) > >> ... > >> ceph_mdsc_put_request() > >> --> ceph_mdsc_release_request() > >> --> ceph_put_cap_request() > >> --> ceph_put_cap_refs() > >> --> ceph_check_caps() > >> ... > >> mutex_unlock(&mdsc->mutex) > >> > >> And also there maybe has some double session lock cases, like: > >> > >> send_mds_reconnect() > >> ... > >> mutex_lock(&session->s_mutex); > >> ... > >> --> replay_unsafe_requests() > >> --> ceph_mdsc_release_dir_caps() > >> --> ceph_put_cap_refs() > >> --> ceph_check_caps() > >> ... > >> mutex_unlock(&session->s_mutex); > >> > >> URL: https://tracker.ceph.com/issues/45635 > >> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > >> --- > >> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++ > >> fs/ceph/inode.c | 3 +++ > >> fs/ceph/mds_client.c | 12 +++++++----- > >> fs/ceph/super.h | 5 +++++ > >> 4 files changed, 44 insertions(+), 5 deletions(-) > >> > >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > >> index 27c2e60..aea66c1 100644 > >> --- a/fs/ceph/caps.c > >> +++ b/fs/ceph/caps.c > >> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info > >> *ci, int had) > >> iput(inode); > >> } > >> +void ceph_async_put_cap_refs_work(struct work_struct *work) > >> +{ > >> + struct ceph_inode_info *ci = container_of(work, struct > >> ceph_inode_info, > >> + put_cap_refs_work); > >> + int caps; > >> + > >> + spin_lock(&ci->i_ceph_lock); > >> + caps = xchg(&ci->pending_put_caps, 0); > >> + spin_unlock(&ci->i_ceph_lock); > >> + > >> + ceph_put_cap_refs(ci, caps); > >> +} > >> + > >> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had) > >> +{ > >> + struct inode *inode = &ci->vfs_inode; > >> + > >> + spin_lock(&ci->i_ceph_lock); > >> + if (ci->pending_put_caps & had) { > >> + spin_unlock(&ci->i_ceph_lock); > >> + return; > >> + } > > > > this will cause cap ref leak. > > Ah, yeah, right. > > > > > > I thought about this again. all the trouble is caused by calling > > ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request(). > > And also in ceph_mdsc_release_request() it is calling > ceph_put_cap_refs() directly in other 3 places. > putting CEPH_CAP_PIN does not trigger check_caps(). So only ceph_mdsc_release_dir_caps() matters. > BRs > > Xiubo > > > We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb() > > for normal circumdtance. We just need to call > > ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort > > async request). In the 'session closed' case, we can use > > ceph_put_cap_refs_no_check_caps() > > > > Regards > > Yan, Zheng > > > >> + > >> + ci->pending_put_caps |= had; > >> + spin_unlock(&ci->i_ceph_lock); > >> + > >> + queue_work(ceph_inode_to_client(inode)->inode_wq, > >> + &ci->put_cap_refs_work); > >> +} > >> /* > >> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap > >> * context. Adjust per-snap dirty page accounting as appropriate. > >> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > >> index 357c937..303276a 100644 > >> --- a/fs/ceph/inode.c > >> +++ b/fs/ceph/inode.c > >> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block > >> *sb) > >> INIT_LIST_HEAD(&ci->i_snap_realm_item); > >> INIT_LIST_HEAD(&ci->i_snap_flush_item); > >> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work); > >> + ci->pending_put_caps = 0; > >> + > >> INIT_WORK(&ci->i_work, ceph_inode_work); > >> ci->i_work_mask = 0; > >> memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); > >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > >> index 0e0ab01..40b31da 100644 > >> --- a/fs/ceph/mds_client.c > >> +++ b/fs/ceph/mds_client.c > >> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref) > >> if (req->r_reply) > >> ceph_msg_put(req->r_reply); > >> if (req->r_inode) { > >> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); > >> + ceph_async_put_cap_refs(ceph_inode(req->r_inode), > >> + CEPH_CAP_PIN); > >> /* avoid calling iput_final() in mds dispatch threads */ > >> ceph_async_iput(req->r_inode); > >> } > >> if (req->r_parent) { > >> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); > >> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), > >> + CEPH_CAP_PIN); > >> ceph_async_iput(req->r_parent); > >> } > >> ceph_async_iput(req->r_target_inode); > >> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref) > >> * changed between the dir mutex being dropped and > >> * this request being freed. > >> */ > >> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), > >> - CEPH_CAP_PIN); > >> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir), > >> + CEPH_CAP_PIN); > >> ceph_async_iput(req->r_old_dentry_dir); > >> } > >> kfree(req->r_path1); > >> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct > >> ceph_mds_request *req) > >> dcaps = xchg(&req->r_dir_caps, 0); > >> if (dcaps) { > >> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); > >> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); > >> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps); > >> } > >> } > >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h > >> index 226f19c..01d206f 100644 > >> --- a/fs/ceph/super.h > >> +++ b/fs/ceph/super.h > >> @@ -421,6 +421,9 @@ struct ceph_inode_info { > >> struct timespec64 i_btime; > >> struct timespec64 i_snap_btime; > >> + struct work_struct put_cap_refs_work; > >> + int pending_put_caps; > >> + > >> struct work_struct i_work; > >> unsigned long i_work_mask; > >> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct > >> ceph_inode_info *ci, int caps, > >> bool snap_rwsem_locked); > >> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); > >> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); > >> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int > >> had); > >> +extern void ceph_async_put_cap_refs_work(struct work_struct *work); > >> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, > >> int nr, > >> struct ceph_snap_context *snapc); > >> extern void ceph_flush_snaps(struct ceph_inode_info *ci, > >> > > >