currently, we only sync all osdc request in lock releasing, but if we are going to support journaling, we need to wait all img_request complete, not only the low-level in osd_client. Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx> --- drivers/block/rbd.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4c5f36e..a583c2e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -386,6 +386,9 @@ struct rbd_device { struct list_head node; + atomic_t inflight_ios; + struct completion inflight_wait; + /* sysfs related */ struct device dev; unsigned long open_count; /* protected by lock */ @@ -1654,6 +1657,7 @@ static struct rbd_img_request *rbd_img_request_create( spin_lock_init(&img_request->completion_lock); INIT_LIST_HEAD(&img_request->object_extents); kref_init(&img_request->kref); + atomic_inc(&rbd_dev->inflight_ios); dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev, obj_op_name(op_type), img_request); @@ -1670,6 +1674,8 @@ static void rbd_img_request_destroy(struct kref *kref) dout("%s: img %p\n", __func__, img_request); + atomic_dec(&img_request->rbd_dev->inflight_ios); + complete_all(&img_request->rbd_dev->inflight_wait); for_each_obj_request_safe(img_request, obj_request, next_obj_request) rbd_img_obj_request_del(img_request, obj_request); rbd_assert(img_request->obj_request_count == 0); @@ -3075,26 +3081,39 @@ static void rbd_acquire_lock(struct work_struct *work) } } +static int rbd_inflight_wait(struct rbd_device *rbd_dev) +{ + int ret = 0; + + while (atomic_read(&rbd_dev->inflight_ios)) { + ret = wait_for_completion_interruptible(&rbd_dev->inflight_wait); + if (ret) + break; + } + + return ret; +} + /* * lock_rwsem must be held for write */ static bool rbd_release_lock(struct rbd_device *rbd_dev) { + int ret = 0; + dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev, rbd_dev->lock_state); if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED) return false; rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; - downgrade_write(&rbd_dev->lock_rwsem); - /* - * Ensure that all in-flight IO is flushed. - * - * FIXME: ceph_osdc_sync() flushes the entire OSD client, which - * may be shared with other devices. - */ - ceph_osdc_sync(&rbd_dev->rbd_client->client->osdc); - up_read(&rbd_dev->lock_rwsem); + up_write(&rbd_dev->lock_rwsem); + + ret = rbd_inflight_wait(rbd_dev); + if (ret) { + down_write(&rbd_dev->lock_rwsem); + return false; + } down_write(&rbd_dev->lock_rwsem); dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev, @@ -4392,6 +4411,8 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); + atomic_set(&rbd_dev->inflight_ios, 0); + init_completion(&rbd_dev->inflight_wait); rbd_dev->header.data_pool_id = CEPH_NOPOOL; ceph_oid_init(&rbd_dev->header_oid); rbd_dev->header_oloc.pool = spec->pool_id; -- 1.8.3.1