This one inctroduces CEPH_OSD_OP_FLAG_ZERO_NOUNMAP flag for CEPH_OSD_OP_ZERO in order to mark zero requests not to do discards on osd side, but zero blocks instead. Old osds versions simply ignore CEPH_OSD_OP_FLAG_ZERO_NOUNMAP set and discard blocks as before. Signed-off-by: Roman Penyaev <rpenyaev@xxxxxxx> Cc: Ilya Dryomov <idryomov@xxxxxxxxx> Cc: Sage Weil <sage@xxxxxxxxxx> Cc: Alex Elder <elder@xxxxxxxxxx> Cc: "Yan, Zheng" <zyan@xxxxxxxxxx> Cc: ceph-devel@xxxxxxxxxxxxxxx --- drivers/block/rbd.c | 42 +++++++++++++++++++++++++++++--------- include/linux/ceph/rados.h | 1 + 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f45490134880..6dceb2f2cf51 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -213,6 +213,7 @@ enum obj_request_type { enum obj_operation_type { OBJ_OP_READ = 1, OBJ_OP_WRITE, + OBJ_OP_WRITE_ZEROES, OBJ_OP_DISCARD, }; @@ -856,6 +857,8 @@ static char* obj_op_name(enum obj_operation_type op_type) return "read"; case OBJ_OP_WRITE: return "write"; + case OBJ_OP_WRITE_ZEROES: + return "write_zeroes"; case OBJ_OP_DISCARD: return "discard"; default: @@ -1422,6 +1425,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) case OBJ_OP_READ: return false; case OBJ_OP_WRITE: + case OBJ_OP_WRITE_ZEROES: case OBJ_OP_DISCARD: return true; default: @@ -1846,13 +1850,14 @@ static int rbd_obj_setup_write(struct rbd_obj_request *obj_req) return 0; } -static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, - unsigned int which) +static void +__rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req, + unsigned int which, bool nounmap) { u32 flags = 0; u16 opcode; - if (rbd_obj_is_entire(obj_req)) { + if (!nounmap && rbd_obj_is_entire(obj_req)) { if (obj_req->num_img_extents) { osd_req_op_init(obj_req->osd_req, which++, CEPH_OSD_OP_CREATE, 0); @@ -1862,10 +1867,11 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, CEPH_OSD_OP_DELETE, 0); opcode = 0; } - } else if (rbd_obj_is_tail(obj_req)) { + } else if (!nounmap && rbd_obj_is_tail(obj_req)) { opcode = CEPH_OSD_OP_TRUNCATE; } else { opcode = CEPH_OSD_OP_ZERO; + flags = (nounmap ? CEPH_OSD_OP_FLAG_ZERO_NOUNMAP : 0); } if (opcode) @@ -1877,7 +1883,8 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, rbd_osd_req_format_write(obj_req); } -static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) +static int rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req, + bool nounmap) { unsigned int num_osd_ops, which = 0; int ret; @@ -1913,7 +1920,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) return ret; } - __rbd_obj_setup_discard(obj_req, which); + __rbd_obj_setup_discard_write_zeroes(obj_req, which, nounmap); return 0; } @@ -1925,6 +1932,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) static int __rbd_img_fill_request(struct rbd_img_request *img_req) { struct rbd_obj_request *obj_req; + bool nounmap = false; int ret; for_each_obj_request(img_req, obj_req) { @@ -1935,8 +1943,12 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) case OBJ_OP_WRITE: ret = rbd_obj_setup_write(obj_req); break; + case OBJ_OP_WRITE_ZEROES: + nounmap = true; + /* fall through */ case OBJ_OP_DISCARD: - ret = rbd_obj_setup_discard(obj_req); + ret = rbd_obj_setup_discard_write_zeroes(obj_req, + nounmap); break; default: rbd_assert(0); @@ -2361,6 +2373,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) { unsigned int num_osd_ops = obj_req->osd_req->r_num_ops; + bool nounmap = false; int ret; dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); @@ -2398,9 +2411,12 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) case OBJ_OP_WRITE: __rbd_obj_setup_write(obj_req, 1); break; + case OBJ_OP_WRITE_ZEROES: + nounmap = true; + /* fall through */ case OBJ_OP_DISCARD: rbd_assert(!rbd_obj_is_entire(obj_req)); - __rbd_obj_setup_discard(obj_req, 1); + __rbd_obj_setup_discard_write_zeroes(obj_req, 1, nounmap); break; default: rbd_assert(0); @@ -2529,6 +2545,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) return rbd_obj_handle_read(obj_req); case OBJ_OP_WRITE: return rbd_obj_handle_write(obj_req); + case OBJ_OP_WRITE_ZEROES: case OBJ_OP_DISCARD: if (rbd_obj_handle_write(obj_req)) { /* @@ -3641,8 +3658,13 @@ static void rbd_queue_workfn(struct work_struct *work) int result; switch (req_op(rq)) { - case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: + if (rq->cmd_flags & REQ_NOUNMAP) { + op_type = OBJ_OP_WRITE_ZEROES; + break; + } + /* fall through */ + case REQ_OP_DISCARD: op_type = OBJ_OP_DISCARD; break; case REQ_OP_WRITE: @@ -3724,7 +3746,7 @@ static void rbd_queue_workfn(struct work_struct *work) img_request->rq = rq; snapc = NULL; /* img_request consumes a ref */ - if (op_type == OBJ_OP_DISCARD) + if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE_ZEROES) result = rbd_img_fill_nodata(img_request, offset, length); else result = rbd_img_fill_from_bio(img_request, offset, length, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3eb0e55665b4..e19fc5e541c3 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -418,6 +418,7 @@ enum { in the near future */ CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only once by this client */ + CEPH_OSD_OP_FLAG_ZERO_NOUNMAP = 0x200,/* do not discard on zeroing */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ -- 2.19.1