From: Xiubo Li <xiubli@xxxxxxxxxx> The reconnect feature never been supported by MDS in mds non-RECONNECT state. This reconnect requests will incorrectly close the just reopened sessions when the MDS kills them during the "mds_session_blocklist_on_evict" option is disabled. Fixes: 7e70f0ed9f3e ("ceph: attempt mds reconnect if mds closes our session") URL: https://tracker.ceph.com/issues/65647 Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> --- V2: - Try to cleanup the sessions and retry the requests. fs/ceph/mds_client.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f5b25d178118..50c06a03b5fe 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -6241,9 +6241,40 @@ static void mds_peer_reset(struct ceph_connection *con) pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", s->s_mds); - if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO && - ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT) + + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO || + ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) < CEPH_MDS_STATE_RECONNECT) + return; + + if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) == CEPH_MDS_STATE_RECONNECT) { send_mds_reconnect(mdsc, s); + return; + } + + mutex_lock(&s->s_mutex); + switch (s->s_state) { + case CEPH_MDS_SESSION_CLOSING: + case CEPH_MDS_SESSION_OPEN: + case CEPH_MDS_SESSION_OPENING: + mutex_lock(&mdsc->mutex); + ceph_get_mds_session(s); + __unregister_session(mdsc, s); + mutex_unlock(&mdsc->mutex); + + s->s_state = CEPH_MDS_SESSION_CLOSED; + cleanup_session_requests(mdsc, s); + remove_session_caps(s); + wake_up_all(&mdsc->session_close_wq); + + mutex_lock(&mdsc->mutex); + __wake_requests(mdsc, &s->s_waiting); + kick_requests(mdsc, s->s_mds); + mutex_unlock(&mdsc->mutex); + + ceph_put_mds_session(s); + break; + } + mutex_unlock(&s->s_mutex); } static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) -- 2.44.0