On 1/30/20 3:26 PM, Laurence Oberman wrote: > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote: >> The object request list can be accessed from various contexts >> so we need to lock it to avoid concurrent modifications and >> random crashes. >> >> Signed-off-by: Hannes Reinecke <hare@xxxxxxx> >> --- >> drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- >> 1 file changed, 24 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c >> index 5710b2a8609c..ddc170661607 100644 >> --- a/drivers/block/rbd.c >> +++ b/drivers/block/rbd.c >> @@ -344,6 +344,7 @@ struct rbd_img_request { >> >> struct list_head lock_item; >> struct list_head object_extents; /* obj_req.ex structs */ >> + struct mutex object_mutex; >> >> struct mutex state_mutex; >> struct pending_result pending; >> @@ -1664,6 +1665,7 @@ static struct rbd_img_request >> *rbd_img_request_create( >> INIT_LIST_HEAD(&img_request->lock_item); >> INIT_LIST_HEAD(&img_request->object_extents); >> mutex_init(&img_request->state_mutex); >> + mutex_init(&img_request->object_mutex); >> kref_init(&img_request->kref); >> >> return img_request; >> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct >> kref *kref) >> dout("%s: img %p\n", __func__, img_request); >> >> WARN_ON(!list_empty(&img_request->lock_item)); >> + mutex_lock(&img_request->object_mutex); >> for_each_obj_request_safe(img_request, obj_request, >> next_obj_request) >> rbd_img_obj_request_del(img_request, obj_request); >> + mutex_unlock(&img_request->object_mutex); >> >> if (img_request_layered_test(img_request)) { >> img_request_layered_clear(img_request); >> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct >> rbd_img_request *img_req) >> struct rbd_obj_request *obj_req, *next_obj_req; >> int ret; >> >> + mutex_lock(&img_req->object_mutex); >> for_each_obj_request_safe(img_req, obj_req, next_obj_req) { >> switch (img_req->op_type) { >> case OBJ_OP_READ: >> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct >> rbd_img_request *img_req) >> continue; >> } >> } >> - >> + mutex_unlock(&img_req->object_mutex); >> img_req->state = RBD_IMG_START; >> return 0; >> } >> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct >> rbd_img_request *img_req, >> * position in the provided bio (list) or bio_vec array. >> */ >> fctx->iter = *fctx->pos; >> + mutex_lock(&img_req->object_mutex); >> for (i = 0; i < num_img_extents; i++) { >> ret = ceph_file_to_extents(&img_req->rbd_dev->layout, >> img_extents[i].fe_off, >> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct >> rbd_img_request *img_req, >> &img_req->object_extents, >> alloc_object_extent, >> img_req, >> fctx->set_pos_fn, &fctx- >>> iter); >> - if (ret) >> + if (ret) { >> + mutex_unlock(&img_req->object_mutex); >> return ret; >> + } >> } >> - >> + mutex_unlock(&img_req->object_mutex); >> return __rbd_img_fill_request(img_req); >> } >> >> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> * or bio_vec array because when mapped, those bio_vecs can >> straddle >> * stripe unit boundaries. >> */ >> + mutex_lock(&img_req->object_mutex); >> fctx->iter = *fctx->pos; >> for (i = 0; i < num_img_extents; i++) { >> ret = ceph_file_to_extents(&rbd_dev->layout, >> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> alloc_object_extent, >> img_req, >> fctx->count_fn, &fctx- >>> iter); >> if (ret) >> - return ret; >> + goto out_unlock; >> } >> >> for_each_obj_request(img_req, obj_req) { >> obj_req->bvec_pos.bvecs = kmalloc_array(obj_req- >>> bvec_count, >> sizeof(*obj_req- >>> bvec_pos.bvecs), >> GFP_NOIO); >> - if (!obj_req->bvec_pos.bvecs) >> - return -ENOMEM; >> + if (!obj_req->bvec_pos.bvecs) { >> + ret = -ENOMEM; >> + goto out_unlock; >> + } >> } >> >> /* >> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> &img_req->object_extents, >> fctx->copy_fn, &fctx->iter); >> if (ret) >> - return ret; >> + goto out_unlock; >> } >> + mutex_unlock(&img_req->object_mutex); >> >> return __rbd_img_fill_request(img_req); >> +out_unlock: >> + mutex_unlock(&img_req->object_mutex); >> + return ret; >> } >> >> static int rbd_img_fill_nodata(struct rbd_img_request *img_req, >> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct >> rbd_img_request *img_req) >> >> rbd_assert(!img_req->pending.result && !img_req- >>> pending.num_pending); >> >> + mutex_lock(&img_req->object_mutex); >> for_each_obj_request(img_req, obj_req) { >> int result = 0; >> >> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct >> rbd_img_request *img_req) >> img_req->pending.num_pending++; >> } >> } >> + mutex_unlock(&img_req->object_mutex); >> } >> >> static bool rbd_img_advance(struct rbd_img_request *img_req, int >> *result) > > Looks good to me. Just wonder how we escaped this for so long. > > Reviewed-by: Laurence Oberman <loberman@xxxxxxxxxx> > The whole state machine is utterly fragile. I'll be posting a patchset to clean stuff up somewhat, but it's still a beast. I'm rather surprised that it doesn't break more often ... Cheers, Hannes -- Dr. Hannes Reinecke Teamlead Storage & Networking hare@xxxxxxx +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer