From: Xiubo Li <xiubli@xxxxxxxxxx> The type of 'num_fwd' in ceph 'MClientRequestForward' is 'int32_t', while in 'ceph_mds_request_head' the type is '__u8'. So in case the request bounces between MDSes exceeding 256 times, the client will get stuck. In this case it's ususally a bug in MDS and continue bouncing the request makes no sense. Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> --- fs/ceph/mds_client.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a89ee866ebbb..0bb6e7bc499c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3293,6 +3293,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, int err = -EINVAL; void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; + bool aborted = false; ceph_decode_need(&p, end, 2*sizeof(u32), bad); next_mds = ceph_decode_32(&p); @@ -3309,8 +3310,28 @@ static void handle_forward(struct ceph_mds_client *mdsc, dout("forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); } else if (fwd_seq <= req->r_num_fwd) { - dout("forward tid %llu to mds%d - old seq %d <= %d\n", - tid, next_mds, req->r_num_fwd, fwd_seq); + /* + * The type of 'num_fwd' in ceph 'MClientRequestForward' + * is 'int32_t', while in 'ceph_mds_request_head' the + * type is '__u8'. So in case the request bounces between + * MDSes exceeding 256 times, the client will get stuck. + * + * In this case it's ususally a bug in MDS and continue + * bouncing the request makes no sense. + */ + if (req->r_num_fwd == 256) { + mutex_lock(&req->r_fill_mutex); + req->r_err = -EIO; + set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); + mutex_unlock(&req->r_fill_mutex); + aborted = true; + dout("forward tid %llu to mds%d - seq overflowed %d <= %d\n", + tid, next_mds, req->r_num_fwd, fwd_seq); + goto out; + } else { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", + tid, next_mds, req->r_num_fwd, fwd_seq); + } } else { /* resend. forward race not possible; mds would drop */ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); @@ -3322,9 +3343,13 @@ static void handle_forward(struct ceph_mds_client *mdsc, put_request_session(req); __do_request(mdsc, req); } - ceph_mdsc_put_request(req); out: mutex_unlock(&mdsc->mutex); + + /* kick calling process */ + if (aborted) + complete_request(mdsc, req); + ceph_mdsc_put_request(req); return; bad: -- 2.27.0