This can happen if __close_session() in ceph_monc_stop() races with a connection reset. We need to ignore such faults, otherwise it's likely we would take !hunting, call __schedule_delayed() and end up with delayed_work() executing on invalid memory, among other things. The (two!) con->private tests are useless, as nothing ever clears con->private. Nuke them. Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx> --- net/ceph/mon_client.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index a2b45cf79dca..cf638c009cfa 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1155,22 +1155,17 @@ static void mon_fault(struct ceph_connection *con) { struct ceph_mon_client *monc = con->private; - if (!monc) - return; - - dout("mon_fault\n"); mutex_lock(&monc->mutex); - if (!con->private) - goto out; - - if (!monc->hunting) { - dout("%s hunting for new mon\n", __func__); - reopen_session(monc); - __schedule_delayed(monc); - } else { - dout("%s already hunting\n", __func__); + dout("%s mon%d\n", __func__, monc->cur_mon); + if (monc->cur_mon >= 0) { + if (!monc->hunting) { + dout("%s hunting for new mon\n", __func__); + reopen_session(monc); + __schedule_delayed(monc); + } else { + dout("%s already hunting\n", __func__); + } } -out: mutex_unlock(&monc->mutex); } -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html