when we found uncommitted events in journal, we need to do a replay. This commit only implement three kinds of events replaying: EVENT_TYPE_AIO_DISCARD: Will send a img_request to image with OBJ_OP_DISCARD, and wait for it completed. EVENT_TYPE_AIO_WRITE: Will send a img_request to image with OBJ_OP_WRITE, and wait for it completed. EVENT_TYPE_AIO_FLUSH: As all other events are replayed in synchoronized way, that means the events before are all flushed. we did nothing for this event. Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx> --- drivers/block/rbd.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5b641f8..6f31734 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5832,6 +5832,190 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) return ret; } +enum rbd_journal_event_type { + EVENT_TYPE_AIO_DISCARD = 0, + EVENT_TYPE_AIO_WRITE = 1, + EVENT_TYPE_AIO_FLUSH = 2, + EVENT_TYPE_OP_FINISH = 3, + EVENT_TYPE_SNAP_CREATE = 4, + EVENT_TYPE_SNAP_REMOVE = 5, + EVENT_TYPE_SNAP_RENAME = 6, + EVENT_TYPE_SNAP_PROTECT = 7, + EVENT_TYPE_SNAP_UNPROTECT = 8, + EVENT_TYPE_SNAP_ROLLBACK = 9, + EVENT_TYPE_RENAME = 10, + EVENT_TYPE_RESIZE = 11, + EVENT_TYPE_FLATTEN = 12, + EVENT_TYPE_DEMOTE_PROMOTE = 13, + EVENT_TYPE_SNAP_LIMIT = 14, + EVENT_TYPE_UPDATE_FEATURES = 15, + EVENT_TYPE_METADATA_SET = 16, + EVENT_TYPE_METADATA_REMOVE = 17, + EVENT_TYPE_AIO_WRITESAME = 18, + EVENT_TYPE_AIO_COMPARE_AND_WRITE = 19, +}; + +static struct bio_vec *setup_write_bvecs(void *buf, u64 offset, u64 length) +{ + u32 i; + struct bio_vec *bvecs = NULL; + u32 bvec_count = 0; + + bvec_count = calc_pages_for(offset, length); + bvecs = kcalloc(bvec_count, sizeof(*bvecs), GFP_NOIO); + if (!bvecs) + goto err; + + offset = offset % PAGE_SIZE; + for (i = 0; i < bvec_count; i++) { + unsigned int len = min(length, (u64)PAGE_SIZE - offset); + + bvecs[i].bv_page = alloc_page(GFP_NOIO); + if (!bvecs[i].bv_page) + goto free_bvecs; + + bvecs[i].bv_offset = offset; + bvecs[i].bv_len = len; + memcpy(page_address(bvecs[i].bv_page) + bvecs[i].bv_offset, buf, bvecs[i].bv_len); + length -= len; + buf += len; + offset = 0; + } + + rbd_assert(!length); + + return bvecs; + +free_bvecs: +err: + return NULL; +} + +static int rbd_journal_handle_aio_discard(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid) +{ + uint64_t offset; + uint64_t length; + int result = 0; + enum obj_operation_type op_type; + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; + + offset = ceph_decode_64(p); + length = ceph_decode_64(p); + + snapc = rbd_dev->header.snapc; + ceph_get_snap_context(snapc); + op_type = OBJ_OP_DISCARD; + + img_request = rbd_img_request_create(rbd_dev, op_type, snapc); + if (!img_request) { + result = -ENOMEM; + goto err; + } + img_request->journaler_commit_tid = commit_tid; + + result = rbd_img_fill_nodata(img_request, offset, length); + if (result) + goto err; + + rbd_img_request_submit(img_request); + result = wait_for_completion_interruptible(&img_request->completion); +err: + return result; +} + +static int rbd_journal_handle_aio_write(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid) +{ + uint64_t offset; + uint64_t length; + char *data; + ssize_t data_len; + int result = 0; + enum obj_operation_type op_type; + struct ceph_snap_context *snapc = NULL; + struct rbd_img_request *img_request; + + struct ceph_file_extent ex; + struct bio_vec *bvecs = NULL; + + offset = ceph_decode_64(p); + length = ceph_decode_64(p); + + data_len = ceph_decode_32(p); + if (!ceph_has_room(p, end, data_len)) { + pr_err("our of range"); + return -ERANGE; + } + + data = *p; + *p = (char *) *p + data_len; + + snapc = rbd_dev->header.snapc; + ceph_get_snap_context(snapc); + op_type = OBJ_OP_WRITE; + + img_request = rbd_img_request_create(rbd_dev, op_type, snapc); + if (!img_request) { + result = -ENOMEM; + goto err; + } + + img_request->journaler_commit_tid = commit_tid; + snapc = NULL; /* img_request consumes a ref */ + + ex.fe_off = offset; + ex.fe_len = length; + + bvecs = setup_write_bvecs(data, offset, length); + if (!bvecs) + rbd_warn(rbd_dev, "failed to alloc bvecs."); + result = rbd_img_fill_from_bvecs(img_request, + &ex, 1, bvecs); + if (result) + goto err; + + rbd_img_request_submit(img_request); + result = wait_for_completion_interruptible(&img_request->completion); +err: + if (bvecs) + kfree(bvecs); + return result; +} + +static int rbd_journal_replay(void *entry_handler, struct ceph_journaler_entry *entry, uint64_t commit_tid) +{ + struct rbd_device *rbd_dev = entry_handler; + void *data = entry->data; + void **p = &data; + void *end = *p + entry->data_len; + uint32_t event_type; + u8 struct_v; + u32 struct_len; + int ret = 0; + + ret = ceph_start_decoding(p, end, 1, "rbd_decode_entry", + &struct_v, &struct_len); + if (ret) + return -EINVAL; + + event_type = ceph_decode_32(p); + + switch (event_type) { + case EVENT_TYPE_AIO_WRITE: + rbd_journal_handle_aio_write(rbd_dev, p, end, struct_v, commit_tid); + break; + case EVENT_TYPE_AIO_DISCARD: + rbd_journal_handle_aio_discard(rbd_dev, p, end, struct_v, commit_tid); + break; + case EVENT_TYPE_AIO_FLUSH: + break; + default: + rbd_warn(rbd_dev, "unknown event_type: %u", event_type); + return -EINVAL; + } + return 0; +} + static int rbd_journal_allocate_tag(struct rbd_journal *journal); static int rbd_journal_open(struct rbd_journal *journal) { -- 1.8.3.1