[PATCH v2 15/16] rbd: replay events in journal

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



when we found uncommitted events in journal, we need to do a replay.
This commit only implement three kinds of events replaying:

EVENT_TYPE_AIO_DISCARD:
	Will send a img_request to image with OBJ_OP_DISCARD, and
        wait for it completed.
EVENT_TYPE_AIO_WRITE:
	Will send a img_request to image with OBJ_OP_WRITE, and
        wait for it completed.
EVENT_TYPE_AIO_FLUSH:
	As all other events are replayed in synchoronized way, that
	means the events before are all flushed. we did nothing for this event.

Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx>
---
 drivers/block/rbd.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5b641f8..6f31734 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5832,6 +5832,190 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
 	return ret;
 }
 
+enum rbd_journal_event_type {
+  EVENT_TYPE_AIO_DISCARD           = 0,
+  EVENT_TYPE_AIO_WRITE             = 1,
+  EVENT_TYPE_AIO_FLUSH             = 2,
+  EVENT_TYPE_OP_FINISH             = 3,
+  EVENT_TYPE_SNAP_CREATE           = 4,
+  EVENT_TYPE_SNAP_REMOVE           = 5,
+  EVENT_TYPE_SNAP_RENAME           = 6,
+  EVENT_TYPE_SNAP_PROTECT          = 7,
+  EVENT_TYPE_SNAP_UNPROTECT        = 8,
+  EVENT_TYPE_SNAP_ROLLBACK         = 9,
+  EVENT_TYPE_RENAME                = 10,
+  EVENT_TYPE_RESIZE                = 11,
+  EVENT_TYPE_FLATTEN               = 12,
+  EVENT_TYPE_DEMOTE_PROMOTE        = 13,
+  EVENT_TYPE_SNAP_LIMIT            = 14,
+  EVENT_TYPE_UPDATE_FEATURES       = 15,
+  EVENT_TYPE_METADATA_SET          = 16,
+  EVENT_TYPE_METADATA_REMOVE       = 17,
+  EVENT_TYPE_AIO_WRITESAME         = 18,
+  EVENT_TYPE_AIO_COMPARE_AND_WRITE = 19,
+};
+
+static struct bio_vec *setup_write_bvecs(void *buf, u64 offset, u64 length)
+{
+	u32 i;
+	struct bio_vec *bvecs = NULL;
+	u32 bvec_count = 0;
+
+	bvec_count = calc_pages_for(offset, length);
+	bvecs = kcalloc(bvec_count, sizeof(*bvecs), GFP_NOIO);
+	if (!bvecs)
+		goto err;
+
+	offset = offset % PAGE_SIZE;
+	for (i = 0; i < bvec_count; i++) {
+		unsigned int len = min(length, (u64)PAGE_SIZE - offset);
+
+		bvecs[i].bv_page = alloc_page(GFP_NOIO);
+		if (!bvecs[i].bv_page)
+			goto free_bvecs;
+
+		bvecs[i].bv_offset = offset;
+		bvecs[i].bv_len = len;
+		memcpy(page_address(bvecs[i].bv_page) + bvecs[i].bv_offset, buf, bvecs[i].bv_len);
+		length -= len;
+		buf += len;
+		offset = 0;
+	}
+
+	rbd_assert(!length);
+
+	return bvecs;
+
+free_bvecs:
+err:
+	return NULL;
+}
+
+static int rbd_journal_handle_aio_discard(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid)
+{
+	uint64_t offset;
+	uint64_t length;
+	int result = 0;
+	enum obj_operation_type op_type;
+	struct rbd_img_request *img_request;
+	struct ceph_snap_context *snapc = NULL;
+
+	offset = ceph_decode_64(p);
+	length = ceph_decode_64(p);
+
+	snapc = rbd_dev->header.snapc;
+	ceph_get_snap_context(snapc);
+	op_type = OBJ_OP_DISCARD;
+
+	img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+	if (!img_request) {
+		result = -ENOMEM;
+		goto err;
+	}
+	img_request->journaler_commit_tid = commit_tid;
+
+	result = rbd_img_fill_nodata(img_request, offset, length);
+	if (result)
+		goto err;
+
+	rbd_img_request_submit(img_request);
+	result = wait_for_completion_interruptible(&img_request->completion);
+err:
+	return result;
+}
+
+static int rbd_journal_handle_aio_write(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid)
+{
+	uint64_t offset;
+	uint64_t length;
+	char *data;
+	ssize_t data_len;
+	int result = 0;
+	enum obj_operation_type op_type;
+	struct ceph_snap_context *snapc = NULL;
+	struct rbd_img_request *img_request;
+
+	struct ceph_file_extent ex;
+	struct bio_vec *bvecs = NULL;
+
+	offset = ceph_decode_64(p);
+	length = ceph_decode_64(p);
+
+	data_len = ceph_decode_32(p);
+	if (!ceph_has_room(p, end, data_len)) {
+		pr_err("our of range");
+		return -ERANGE;
+	}
+
+	data = *p;
+	*p = (char *) *p + data_len;
+
+	snapc = rbd_dev->header.snapc;
+	ceph_get_snap_context(snapc);
+	op_type = OBJ_OP_WRITE;
+
+	img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+	if (!img_request) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	img_request->journaler_commit_tid = commit_tid;
+	snapc = NULL; /* img_request consumes a ref */
+
+	ex.fe_off = offset;
+	ex.fe_len = length;
+
+	bvecs = setup_write_bvecs(data, offset, length);
+	if (!bvecs)
+		rbd_warn(rbd_dev, "failed to alloc bvecs.");
+	result = rbd_img_fill_from_bvecs(img_request,
+				         &ex, 1, bvecs);
+	if (result)
+		goto err;
+
+	rbd_img_request_submit(img_request);
+	result = wait_for_completion_interruptible(&img_request->completion);
+err:
+	if (bvecs)
+		kfree(bvecs);
+	return result;
+}
+
+static int rbd_journal_replay(void *entry_handler, struct ceph_journaler_entry *entry, uint64_t commit_tid)
+{
+	struct rbd_device *rbd_dev = entry_handler;
+	void *data = entry->data;
+	void **p = &data;
+	void *end = *p + entry->data_len;
+	uint32_t event_type;
+	u8 struct_v;
+	u32 struct_len;
+	int ret = 0;
+
+	ret = ceph_start_decoding(p, end, 1, "rbd_decode_entry",
+				  &struct_v, &struct_len);
+	if (ret)
+		return -EINVAL;
+
+	event_type = ceph_decode_32(p);
+
+	switch (event_type) {
+	case EVENT_TYPE_AIO_WRITE:
+		rbd_journal_handle_aio_write(rbd_dev, p, end, struct_v, commit_tid);
+		break;
+	case EVENT_TYPE_AIO_DISCARD:
+		rbd_journal_handle_aio_discard(rbd_dev, p, end, struct_v, commit_tid);
+		break;
+	case EVENT_TYPE_AIO_FLUSH:
+		break;
+	default:
+		rbd_warn(rbd_dev, "unknown event_type: %u", event_type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int rbd_journal_allocate_tag(struct rbd_journal *journal);
 static int rbd_journal_open(struct rbd_journal *journal)
 {
-- 
1.8.3.1





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux