[RFC PATCH 2/2] libceph, rbd: respect REQ_NOUNMAP by setting new nounmap flag for CEPH_OSD_OP_ZERO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This one inctroduces CEPH_OSD_OP_FLAG_ZERO_NOUNMAP flag for CEPH_OSD_OP_ZERO
in order to mark zero requests not to do discards on osd side, but zero
blocks instead.

Old osds versions simply ignore CEPH_OSD_OP_FLAG_ZERO_NOUNMAP set and
discard blocks as before.

Signed-off-by: Roman Penyaev <rpenyaev@xxxxxxx>
Cc: Ilya Dryomov <idryomov@xxxxxxxxx>
Cc: Sage Weil <sage@xxxxxxxxxx>
Cc: Alex Elder <elder@xxxxxxxxxx>
Cc: "Yan, Zheng" <zyan@xxxxxxxxxx>
Cc: ceph-devel@xxxxxxxxxxxxxxx
---
 drivers/block/rbd.c        | 42 +++++++++++++++++++++++++++++---------
 include/linux/ceph/rados.h |  1 +
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f45490134880..6dceb2f2cf51 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -213,6 +213,7 @@ enum obj_request_type {
 enum obj_operation_type {
 	OBJ_OP_READ = 1,
 	OBJ_OP_WRITE,
+	OBJ_OP_WRITE_ZEROES,
 	OBJ_OP_DISCARD,
 };
 
@@ -856,6 +857,8 @@ static char* obj_op_name(enum obj_operation_type op_type)
 		return "read";
 	case OBJ_OP_WRITE:
 		return "write";
+	case OBJ_OP_WRITE_ZEROES:
+		return "write_zeroes";
 	case OBJ_OP_DISCARD:
 		return "discard";
 	default:
@@ -1422,6 +1425,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req)
 	case OBJ_OP_READ:
 		return false;
 	case OBJ_OP_WRITE:
+	case OBJ_OP_WRITE_ZEROES:
 	case OBJ_OP_DISCARD:
 		return true;
 	default:
@@ -1846,13 +1850,14 @@ static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
 	return 0;
 }
 
-static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
-				    unsigned int which)
+static void
+__rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req,
+				     unsigned int which, bool nounmap)
 {
 	u32 flags = 0;
 	u16 opcode;
 
-	if (rbd_obj_is_entire(obj_req)) {
+	if (!nounmap && rbd_obj_is_entire(obj_req)) {
 		if (obj_req->num_img_extents) {
 			osd_req_op_init(obj_req->osd_req, which++,
 					CEPH_OSD_OP_CREATE, 0);
@@ -1862,10 +1867,11 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
 					CEPH_OSD_OP_DELETE, 0);
 			opcode = 0;
 		}
-	} else if (rbd_obj_is_tail(obj_req)) {
+	} else if (!nounmap && rbd_obj_is_tail(obj_req)) {
 		opcode = CEPH_OSD_OP_TRUNCATE;
 	} else {
 		opcode = CEPH_OSD_OP_ZERO;
+		flags = (nounmap ? CEPH_OSD_OP_FLAG_ZERO_NOUNMAP : 0);
 	}
 
 	if (opcode)
@@ -1877,7 +1883,8 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
 	rbd_osd_req_format_write(obj_req);
 }
 
-static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
+static int rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req,
+					      bool nounmap)
 {
 	unsigned int num_osd_ops, which = 0;
 	int ret;
@@ -1913,7 +1920,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
 			return ret;
 	}
 
-	__rbd_obj_setup_discard(obj_req, which);
+	__rbd_obj_setup_discard_write_zeroes(obj_req, which, nounmap);
 	return 0;
 }
 
@@ -1925,6 +1932,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
 static int __rbd_img_fill_request(struct rbd_img_request *img_req)
 {
 	struct rbd_obj_request *obj_req;
+	bool nounmap = false;
 	int ret;
 
 	for_each_obj_request(img_req, obj_req) {
@@ -1935,8 +1943,12 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
 		case OBJ_OP_WRITE:
 			ret = rbd_obj_setup_write(obj_req);
 			break;
+		case OBJ_OP_WRITE_ZEROES:
+			nounmap = true;
+			/* fall through */
 		case OBJ_OP_DISCARD:
-			ret = rbd_obj_setup_discard(obj_req);
+			ret = rbd_obj_setup_discard_write_zeroes(obj_req,
+								 nounmap);
 			break;
 		default:
 			rbd_assert(0);
@@ -2361,6 +2373,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
 static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 {
 	unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
+	bool nounmap = false;
 	int ret;
 
 	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
@@ -2398,9 +2411,12 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 	case OBJ_OP_WRITE:
 		__rbd_obj_setup_write(obj_req, 1);
 		break;
+	case OBJ_OP_WRITE_ZEROES:
+		nounmap = true;
+		/* fall through */
 	case OBJ_OP_DISCARD:
 		rbd_assert(!rbd_obj_is_entire(obj_req));
-		__rbd_obj_setup_discard(obj_req, 1);
+		__rbd_obj_setup_discard_write_zeroes(obj_req, 1, nounmap);
 		break;
 	default:
 		rbd_assert(0);
@@ -2529,6 +2545,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
 		return rbd_obj_handle_read(obj_req);
 	case OBJ_OP_WRITE:
 		return rbd_obj_handle_write(obj_req);
+	case OBJ_OP_WRITE_ZEROES:
 	case OBJ_OP_DISCARD:
 		if (rbd_obj_handle_write(obj_req)) {
 			/*
@@ -3641,8 +3658,13 @@ static void rbd_queue_workfn(struct work_struct *work)
 	int result;
 
 	switch (req_op(rq)) {
-	case REQ_OP_DISCARD:
 	case REQ_OP_WRITE_ZEROES:
+		if (rq->cmd_flags & REQ_NOUNMAP) {
+			op_type = OBJ_OP_WRITE_ZEROES;
+			break;
+		}
+		/* fall through */
+	case REQ_OP_DISCARD:
 		op_type = OBJ_OP_DISCARD;
 		break;
 	case REQ_OP_WRITE:
@@ -3724,7 +3746,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 	img_request->rq = rq;
 	snapc = NULL; /* img_request consumes a ref */
 
-	if (op_type == OBJ_OP_DISCARD)
+	if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE_ZEROES)
 		result = rbd_img_fill_nodata(img_request, offset, length);
 	else
 		result = rbd_img_fill_from_bio(img_request, offset, length,
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 3eb0e55665b4..e19fc5e541c3 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -418,6 +418,7 @@ enum {
 						      in the near future */
 	CEPH_OSD_OP_FLAG_FADVISE_NOCACHE    = 0x40,/* data will be accessed only
 						      once by this client */
+	CEPH_OSD_OP_FLAG_ZERO_NOUNMAP       = 0x200,/* do not discard on zeroing */
 };
 
 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
-- 
2.19.1




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux