Patrick reported a case where the MDS and client client had racing session messages to one anothe. The MDS was sending caps to the client and the client was sending a CEPH_SESSION_REQUEST_CLOSE message in order to unmount. Because they were sending at the same time, the REQUEST_CLOSE had too old a sequence number, and the MDS dropped it on the floor. On the client, this would have probably manifested as a 60s hang during umount. The MDS ended up blocklisting the client. Once we've decided to issue a REQUEST_CLOSE, we're finished with the session, so just keep sending them until the MDS acknowledges that. Change the code to retransmit a REQUEST_CLOSE every second if the session hasn't changed state yet. Give up and throw a warning after mount_timeout elapses if we haven't gotten a response. URL: https://tracker.ceph.com/issues/47563 Reported-by: Patrick Donnelly <pdonnell@xxxxxxxxxx> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/ceph/mds_client.c | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b07e7adf146f..d9cb74e3d5e3 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1878,7 +1878,7 @@ static int request_close_session(struct ceph_mds_session *session) static int __close_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { - if (session->s_state >= CEPH_MDS_SESSION_CLOSING) + if (session->s_state > CEPH_MDS_SESSION_CLOSING) return 0; session->s_state = CEPH_MDS_SESSION_CLOSING; return request_close_session(session); @@ -4692,38 +4692,49 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) return atomic_read(&mdsc->num_sessions) <= skipped; } +static bool umount_timed_out(unsigned long timeo) +{ + if (time_before(jiffies, timeo)) + return false; + pr_warn("ceph: unable to close all sessions\n"); + return true; +} + /* * called after sb is ro. */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { - struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_session *session; - int i; - int skipped = 0; + int i, ret; + int skipped; + unsigned long timeo = jiffies + + ceph_timeout_jiffies(mdsc->fsc->client->options->mount_timeout); dout("close_sessions\n"); /* close sessions */ - mutex_lock(&mdsc->mutex); - for (i = 0; i < mdsc->max_sessions; i++) { - session = __ceph_lookup_mds_session(mdsc, i); - if (!session) - continue; - mutex_unlock(&mdsc->mutex); - mutex_lock(&session->s_mutex); - if (__close_session(mdsc, session) <= 0) - skipped++; - mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); + do { + skipped = 0; mutex_lock(&mdsc->mutex); - } - mutex_unlock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + session = __ceph_lookup_mds_session(mdsc, i); + if (!session) + continue; + mutex_unlock(&mdsc->mutex); + mutex_lock(&session->s_mutex); + if (__close_session(mdsc, session) <= 0) + skipped++; + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); + mutex_lock(&mdsc->mutex); + } + mutex_unlock(&mdsc->mutex); - dout("waiting for sessions to close\n"); - wait_event_timeout(mdsc->session_close_wq, - done_closing_sessions(mdsc, skipped), - ceph_timeout_jiffies(opts->mount_timeout)); + dout("waiting for sessions to close\n"); + ret = wait_event_timeout(mdsc->session_close_wq, + done_closing_sessions(mdsc, skipped), HZ); + } while (!ret && !umount_timed_out(timeo)); /* tear down remaining sessions */ mutex_lock(&mdsc->mutex); -- 2.26.2