[PATCH v2 14/16] rbd: append journal first before sending img_request

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



With journaling feature enabled, we need to append event
to journal before sending img_request.

Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx>
---
 drivers/block/rbd.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 196 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bd90c17..5b641f8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -298,6 +298,7 @@ struct rbd_img_request {
 	u32			pending_count;
 
 	struct completion	completion;
+	uint64_t		journaler_commit_tid;
 
 	struct kref		kref;
 };
@@ -441,6 +442,7 @@ struct rbd_journal {
 
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
+static struct kmem_cache	*rbd_journal_ctx_cache;
 
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
@@ -2616,12 +2618,20 @@ static void rbd_img_end_child_request(struct rbd_img_request *img_req)
 static void rbd_img_end_request(struct rbd_img_request *img_req)
 {
 	rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
-	rbd_assert((!img_req->result &&
-		    img_req->xferred == blk_rq_bytes(img_req->rq)) ||
-		   (img_req->result < 0 && !img_req->xferred));
 
-	blk_mq_end_request(img_req->rq,
-			   errno_to_blk_status(img_req->result));
+	if (img_req->rq) {
+		rbd_assert((!img_req->result &&
+			    img_req->xferred == blk_rq_bytes(img_req->rq)) ||
+			   (img_req->result < 0 && !img_req->xferred));
+		blk_mq_end_request(img_req->rq,
+				   errno_to_blk_status(img_req->result));
+	}
+
+	if (img_req->journaler_commit_tid) {
+		ceph_journaler_client_committed(img_req->rbd_dev->journal->journaler,
+						img_req->journaler_commit_tid);
+	}
+
 	complete_all(&img_req->completion);
 	rbd_img_request_put(img_req);
 }
@@ -3689,6 +3699,21 @@ static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire)
 	return ret;
 }
 
+struct rbd_journal_ctx {
+	struct rbd_device *rbd_dev;
+	struct rbd_img_request *img_request;
+	struct request *rq;
+	struct ceph_snap_context *snapc;
+	int result;
+	bool must_be_locked;
+
+	struct ceph_bio_iter	bio_iter;
+};
+
+static int rbd_journal_append(struct rbd_device *rbd_dev, struct bio *bio,
+			      u64 offset, u64 length, enum obj_operation_type op_type,
+			      struct rbd_journal_ctx *ctx);
+
 static void rbd_queue_workfn(struct work_struct *work)
 {
 	struct request *rq = blk_mq_rq_from_pdu(work);
@@ -3794,7 +3819,29 @@ static void rbd_queue_workfn(struct work_struct *work)
 	if (result)
 		goto err_img_request;
 
-	rbd_img_request_submit(img_request);
+	if (!(rbd_dev->header.features & RBD_FEATURE_JOURNALING) ||
+		(op_type == OBJ_OP_READ)) {
+		rbd_img_request_submit(img_request);
+	} else {
+		struct rbd_journal_ctx *ctx = kmem_cache_zalloc(rbd_journal_ctx_cache, GFP_NOIO);
+
+		if (!ctx){
+			result = -ENOMEM;
+			goto err_unlock;
+		}
+
+		ctx->img_request = img_request;
+		ctx->rq = rq;
+		ctx->snapc = snapc;
+		ctx->must_be_locked = must_be_locked;
+		ctx->rbd_dev = rbd_dev;
+		result = rbd_journal_append(rbd_dev, rq->bio, offset, length, op_type, ctx);
+		if (result) {
+			rbd_warn(rbd_dev, "error in rbd_journal_append");
+			goto err_unlock;
+		}
+	}
+
 	if (must_be_locked)
 		up_read(&rbd_dev->lock_rwsem);
 	return;
@@ -5996,6 +6043,140 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 	rbd_dev->spec->image_id = NULL;
 }
 
+static void rbd_journal_callback(struct ceph_journaler_ctx *journaler_ctx)
+{
+	struct rbd_journal_ctx *ctx = journaler_ctx->priv;
+	int result = journaler_ctx->result;
+	struct rbd_device *rbd_dev = ctx->rbd_dev;
+	bool must_be_locked = ctx->must_be_locked;
+
+	if (result)
+		goto err_rq;
+
+	if (must_be_locked)
+		down_read(&rbd_dev->lock_rwsem);
+
+	rbd_img_request_submit(ctx->img_request);
+
+	if (must_be_locked)
+		up_read(&rbd_dev->lock_rwsem);
+
+	goto out;
+
+err_rq:
+	ceph_put_snap_context(ctx->snapc);
+	blk_mq_end_request(ctx->rq, errno_to_blk_status(result));
+	rbd_img_request_put(ctx->img_request);
+out:
+	kmem_cache_free(rbd_journal_ctx_cache, ctx);
+	ceph_journaler_ctx_put(journaler_ctx);
+}
+
+static int rbd_journal_append_write_event(struct rbd_device *rbd_dev, struct bio *bio,
+					  u64 offset, u64 length, struct rbd_journal_ctx *ctx)
+{
+	void *p = NULL;
+	struct ceph_journaler_ctx *journaler_ctx;
+	int ret = 0;
+
+	journaler_ctx = ceph_journaler_ctx_alloc();
+	if (!journaler_ctx) {
+		return -ENOMEM;
+	}
+
+	ctx->bio_iter.bio = bio;
+	ctx->bio_iter.iter = bio->bi_iter;
+
+	journaler_ctx->bio_iter = &ctx->bio_iter;
+	journaler_ctx->bio_len = length;
+	
+	// EVENT_FIXED_SIZE(10 = CEPH_ENCODING_START_BLK_LEN(6) + EVENT_TYPE(4)) + 
+	// 	offset(8) + length(8) + string_len(4) = 30
+	journaler_ctx->prefix_len = 30;
+	journaler_ctx->prefix_offset = PAGE_SIZE - journaler_ctx->prefix_len;
+
+	p = page_address(journaler_ctx->prefix_page) + journaler_ctx->prefix_offset;
+
+	ceph_start_encoding(&p, 1, 1, journaler_ctx->prefix_len + journaler_ctx->bio_len - 6);
+
+	ceph_encode_32(&p, EVENT_TYPE_AIO_WRITE);
+
+	ceph_encode_64(&p, offset);
+	ceph_encode_64(&p, length);
+
+	// first part of ceph_encode_string();
+	ceph_encode_32(&p, journaler_ctx->bio_len);
+
+	journaler_ctx->priv = ctx;
+	journaler_ctx->callback = rbd_journal_callback;
+
+	ret = ceph_journaler_append(rbd_dev->journal->journaler, rbd_dev->journal->tag_tid,
+				    &ctx->img_request->journaler_commit_tid, journaler_ctx);
+	if (ret) {
+		ceph_journaler_ctx_put(journaler_ctx);
+		return ret;
+	}
+	return 0;
+}
+
+static int rbd_journal_append_discard_event(struct rbd_device *rbd_dev, struct bio *bio,
+					    u64 offset, u64 length, struct rbd_journal_ctx *ctx)
+{
+	void *p = NULL;
+	struct ceph_journaler_ctx *journaler_ctx;
+	int ret = 0;
+
+	journaler_ctx = ceph_journaler_ctx_alloc();
+	if (!journaler_ctx) {
+		return -ENOMEM;
+	}
+
+	ctx->bio_iter.bio = bio;
+	ctx->bio_iter.iter = bio->bi_iter;
+
+	journaler_ctx->bio_iter = &ctx->bio_iter;
+	journaler_ctx->bio_len = 0;
+
+	// EVENT_FIXED_SIZE(10 = CEPH_ENCODING_START_BLK_LEN(6) + EVENT_TYPE(4)) + 
+	// 	offset(8) + length(8) = 26
+	journaler_ctx->prefix_len = 26;
+	journaler_ctx->prefix_offset = PAGE_SIZE - journaler_ctx->prefix_len;
+
+	p = page_address(journaler_ctx->prefix_page) + journaler_ctx->prefix_offset;
+
+	ceph_start_encoding(&p, 1, 1, journaler_ctx->prefix_len + journaler_ctx->bio_len - 6);
+
+	ceph_encode_32(&p, EVENT_TYPE_AIO_DISCARD);
+
+	ceph_encode_64(&p, offset);
+	ceph_encode_64(&p, length);
+
+	journaler_ctx->priv = ctx;
+	journaler_ctx->callback = rbd_journal_callback;
+
+	ret = ceph_journaler_append(rbd_dev->journal->journaler, rbd_dev->journal->tag_tid,
+				    &ctx->img_request->journaler_commit_tid, journaler_ctx);
+	if (ret) {
+		ceph_journaler_ctx_put(journaler_ctx);
+		return ret;
+	}
+	return 0;
+}
+
+static int rbd_journal_append(struct rbd_device *rbd_dev, struct bio *bio,
+			      u64 offset, u64 length, enum obj_operation_type op_type,
+			      struct rbd_journal_ctx *ctx)
+{
+	switch (op_type) {
+	case OBJ_OP_WRITE:
+		return rbd_journal_append_write_event(rbd_dev, bio, offset, length, ctx);
+	case OBJ_OP_DISCARD:
+		return rbd_journal_append_discard_event(rbd_dev, bio, offset, length, ctx);
+	default:
+		return 0;
+	}
+}
+
 /*
  * Probe for the existence of the header object for the given rbd
  * device.  If this image is the one being mapped (i.e., not a
@@ -6369,11 +6550,18 @@ static int __init rbd_slab_init(void)
 	rbd_assert(!rbd_obj_request_cache);
 	rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0);
 	if (!rbd_obj_request_cache)
-		goto out_err;
+		goto destroy_img_request_cache;
 
+	rbd_assert(!rbd_journal_ctx_cache);
+	rbd_journal_ctx_cache = KMEM_CACHE(rbd_journal_ctx, 0);
+	if (!rbd_journal_ctx_cache)
+		goto destroy_obj_request_cache;
 	return 0;
 
-out_err:
+destroy_obj_request_cache:
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
+destroy_img_request_cache:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
 	return -ENOMEM;
-- 
1.8.3.1





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux