On Tue, Nov 8, 2022 at 6:50 AM <xiubli@xxxxxxxxxx> wrote: > > From: Xiubo Li <xiubli@xxxxxxxxxx> > > The request's r_session maybe changed when it was forwarded or > resent. > > Cc: stable@xxxxxxxxxxxxxxx > URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- > 1 file changed, 33 insertions(+), 55 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 894adfb4a092..172f18f7459d 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; > struct ceph_inode_info *ci = ceph_inode(inode); > struct ceph_mds_request *req1 = NULL, *req2 = NULL; > + struct ceph_mds_session *s, **sessions = NULL; Hi Xiubo, Nit: mixing pointers and double pointers coupled with differing initialization is generally frowned upon. Keep it on two lines as before: struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; > unsigned int max_sessions; > - int ret, err = 0; > + int i, ret, err = 0; > > spin_lock(&ci->i_unsafe_lock); > if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { > @@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > } > spin_unlock(&ci->i_unsafe_lock); > > - /* > - * The mdsc->max_sessions is unlikely to be changed > - * mostly, here we will retry it by reallocating the > - * sessions array memory to get rid of the mdsc->mutex > - * lock. > - */ > -retry: > - max_sessions = mdsc->max_sessions; > - > /* > * Trigger to flush the journal logs in all the relevant MDSes > * manually, or in the worst case we must wait at most 5 seconds > * to wait the journal logs to be flushed by the MDSes periodically. > */ > + mutex_lock(&mdsc->mutex); > + max_sessions = mdsc->max_sessions; > + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); > + if (!sessions) { > + mutex_unlock(&mdsc->mutex); > + err = -ENOMEM; > + goto out; > + } > + > if ((req1 || req2) && likely(max_sessions)) { Just curious, when can max_sessions be zero here? > - struct ceph_mds_session **sessions = NULL; > - struct ceph_mds_session *s; > struct ceph_mds_request *req; > - int i; > - > - sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); > - if (!sessions) { > - err = -ENOMEM; > - goto out; > - } > > spin_lock(&ci->i_unsafe_lock); > if (req1) { > @@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > s = req->r_session; > if (!s) > continue; > - if (unlikely(s->s_mds >= max_sessions)) { > - spin_unlock(&ci->i_unsafe_lock); > - for (i = 0; i < max_sessions; i++) { > - s = sessions[i]; > - if (s) > - ceph_put_mds_session(s); > - } > - kfree(sessions); > - goto retry; > - } > + if (unlikely(s->s_mds >= max_sessions)) > + continue; Nit: this could be combined with the previous condition: if (!s || unlikely(s->s_mds >= max_sessions)) continue; > if (!sessions[s->s_mds]) { > s = ceph_get_mds_session(s); > sessions[s->s_mds] = s; > @@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > s = req->r_session; > if (!s) > continue; > - if (unlikely(s->s_mds >= max_sessions)) { > - spin_unlock(&ci->i_unsafe_lock); > - for (i = 0; i < max_sessions; i++) { > - s = sessions[i]; > - if (s) > - ceph_put_mds_session(s); > - } > - kfree(sessions); > - goto retry; > - } > + if (unlikely(s->s_mds >= max_sessions)) > + continue; ditto > if (!sessions[s->s_mds]) { > s = ceph_get_mds_session(s); > sessions[s->s_mds] = s; > @@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > } > } > spin_unlock(&ci->i_unsafe_lock); > + } > + mutex_unlock(&mdsc->mutex); > > - /* the auth MDS */ > - spin_lock(&ci->i_ceph_lock); > - if (ci->i_auth_cap) { > - s = ci->i_auth_cap->session; > - if (!sessions[s->s_mds]) > - sessions[s->s_mds] = ceph_get_mds_session(s); > - } > - spin_unlock(&ci->i_ceph_lock); > + /* the auth MDS */ > + spin_lock(&ci->i_ceph_lock); Why was this "auth MDS" block moved outside of max_sessions > 0 branch? Logically, it very much belongs there. Is there a problem with taking ci->i_ceph_lock under mdsc->mutex? > + if (ci->i_auth_cap) { > + s = ci->i_auth_cap->session; > + if (!sessions[s->s_mds] && > + likely(s->s_mds < max_sessions)) This is wrong: s->s_mds must be checked against max_sessions before indexing into sessions array. Also, the entire condition should fit on a single line. > + sessions[s->s_mds] = ceph_get_mds_session(s); > + } > + spin_unlock(&ci->i_ceph_lock); > > - /* send flush mdlog request to MDSes */ > - for (i = 0; i < max_sessions; i++) { > - s = sessions[i]; > - if (s) { > - send_flush_mdlog(s); > - ceph_put_mds_session(s); > - } > + /* send flush mdlog request to MDSes */ > + for (i = 0; i < max_sessions; i++) { > + s = sessions[i]; > + if (s) { > + send_flush_mdlog(s); > + ceph_put_mds_session(s); > } > - kfree(sessions); > } > > dout("%s %p wait on tid %llu %llu\n", __func__, > @@ -2428,6 +2405,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > ceph_mdsc_put_request(req1); > if (req2) > ceph_mdsc_put_request(req2); > + kfree(sessions); Nit: since sessions array is allocated after references to req1 and req2 are grabbed, I would free it before these references are put. Thanks, Ilya