[PATCH 02/18] rbd: add support for scatterlist obj_request_type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Mike Christie <michaelc@xxxxxxxxxxx>

This adds support for a scatterlist rbd obj_request_type, so LIO
can pass down its sg to rbd.

Signed-off-by: Mike Christie <michaelc@xxxxxxxxxxx>
---
 drivers/block/rbd.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 11 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 60257cf..bc0466c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/workqueue.h>
+#include <linux/scatterlist.h>
 
 #include "rbd_types.h"
 
@@ -208,7 +209,7 @@ struct rbd_obj_request;
 typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
 
 enum obj_request_type {
-	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES, OBJ_REQUEST_SG,
 };
 
 enum obj_operation_type {
@@ -264,6 +265,10 @@ struct rbd_obj_request {
 			struct page	**pages;
 			u32		page_count;
 		};
+		struct {
+			struct scatterlist	*sg;
+			unsigned int		init_sg_offset;
+		};
 	};
 	struct page		**copyup_pages;
 	u32			copyup_page_count;
@@ -295,16 +300,22 @@ struct rbd_img_request {
 		u64			snap_id;	/* for reads */
 		struct ceph_snap_context *snapc;	/* for writes */
 	};
-	union {
-		struct request		*rq;		/* block request */
-		struct rbd_obj_request	*obj_request;	/* obj req initiator */
-	};
+
+	struct request		*rq;		/* block request */
+	struct rbd_obj_request	*obj_request;	/* obj req initiator */
+
 	struct page		**copyup_pages;
 	u32			copyup_page_count;
 	spinlock_t		completion_lock;/* protects next_completion */
 	u32			next_completion;
 	rbd_img_callback_t	callback;
+	/*
+	 * xferred is the bytes that have successfully been transferred.
+	 * completed is the bytes that have been accounted for and includes
+	 * both failed and successfully transffered bytes.
+	 */
 	u64			xferred;/* aggregate bytes transferred */
+	u64			completed;
 	int			result;	/* first nonzero obj_request result */
 
 	u32			obj_request_count;
@@ -1273,6 +1284,34 @@ static void zero_bio_chain(struct bio *chain, int start_ofs)
 	}
 }
 
+static void zero_sg(struct scatterlist *sgl, u64 start, u64 length)
+{
+	struct scatterlist *sg = sgl;
+	u64 end = start + length;
+	u64 pos = 0;
+
+	while (pos < end && sg) {
+		if (pos + sg->length > start) {
+			int sg_offset = max_t(int, start - pos, 0);
+			unsigned int length = min_t(unsigned int,
+						    sg->length - sg_offset,
+						    end - pos);
+			void *kaddr;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			kaddr = kmap_atomic(sg_page(sg));
+			memset(kaddr + sg_offset + sg->offset, 0, length);
+			flush_dcache_page(sg_page(sg));
+			kunmap_atomic(kaddr);
+			local_irq_restore(flags);
+		}
+
+		pos += sg->length;
+		sg = sg_next(sg);
+	}
+}
+
 /*
  * similar to zero_bio_chain(), zeros data defined by a page array,
  * starting at the given byte offset from the start of the array and
@@ -1547,6 +1586,7 @@ static bool obj_request_type_valid(enum obj_request_type type)
 	case OBJ_REQUEST_NODATA:
 	case OBJ_REQUEST_BIO:
 	case OBJ_REQUEST_PAGES:
+	case OBJ_REQUEST_SG:
 		return true;
 	default:
 		return false;
@@ -1730,14 +1770,18 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
 	if (obj_request->result == -ENOENT) {
 		if (obj_request->type == OBJ_REQUEST_BIO)
 			zero_bio_chain(obj_request->bio_list, 0);
-		else
+		else if (obj_request->type == OBJ_REQUEST_PAGES)
 			zero_pages(obj_request->pages, 0, length);
+		else if (obj_request->type == OBJ_REQUEST_SG)
+			zero_sg(obj_request->sg, 0, length);
 		obj_request->result = 0;
 	} else if (xferred < length && !obj_request->result) {
 		if (obj_request->type == OBJ_REQUEST_BIO)
 			zero_bio_chain(obj_request->bio_list, xferred);
-		else
+		else if (obj_request->type == OBJ_REQUEST_PAGES)
 			zero_pages(obj_request->pages, xferred, length);
+		else if (obj_request->type == OBJ_REQUEST_SG)
+			zero_sg(obj_request->sg, xferred, length);
 	}
 	obj_request->xferred = length;
 	obj_request_done_set(obj_request);
@@ -2067,6 +2111,7 @@ static void rbd_obj_request_destroy(struct kref *kref)
 	rbd_assert(obj_request_type_valid(obj_request->type));
 	switch (obj_request->type) {
 	case OBJ_REQUEST_NODATA:
+	case OBJ_REQUEST_SG:
 		break;		/* Nothing to do */
 	case OBJ_REQUEST_BIO:
 		if (obj_request->bio_list)
@@ -2168,6 +2213,7 @@ static struct rbd_img_request *rbd_img_request_create(
 	img_request->offset = offset;
 	img_request->length = length;
 	img_request->flags = 0;
+	img_request->completed = 0;
 	if (op_type == OBJ_OP_DISCARD) {
 		img_request_discard_set(img_request);
 		img_request->snapc = snapc;
@@ -2293,6 +2339,7 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		 */
 		xferred = obj_request->length;
 	}
+	img_request->completed += xferred;
 
 	/* Image object requests don't own their page array */
 
@@ -2304,12 +2351,15 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 	if (img_request_child_test(img_request)) {
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
-	} else {
-		rbd_assert(img_request->rq != NULL);
-
+	} else if (img_request->rq) {
 		more = blk_update_request(img_request->rq, result, xferred);
 		if (!more)
 			__blk_mq_end_request(img_request->rq, result);
+	} else {
+		if (img_request->completed < img_request->length)
+			more = true;
+		else
+			more = false;
 	}
 
 	return more;
@@ -2411,6 +2461,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
 		osd_req_op_extent_osd_data_pages(osd_request, num_ops,
 					obj_request->pages, length,
 					offset & ~PAGE_MASK, false, false);
+	else if (obj_request->type == OBJ_REQUEST_SG)
+		osd_req_op_extent_osd_data_sg(osd_request, num_ops,
+					obj_request->sg,
+					obj_request->init_sg_offset, length);
 
 	/* Discards are also writes */
 	if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
@@ -2436,7 +2490,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 	struct rbd_obj_request *next_obj_request;
 	struct bio *bio_list = NULL;
 	unsigned int bio_offset = 0;
+	unsigned int sg_offset = 0;
 	struct page **pages = NULL;
+	struct scatterlist *sgl = NULL;
 	enum obj_operation_type op_type;
 	u64 img_offset;
 	u64 resid;
@@ -2455,6 +2511,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
 	} else if (type == OBJ_REQUEST_PAGES) {
 		pages = data_desc;
+	} else if (type == OBJ_REQUEST_SG) {
+		sgl = data_desc;
 	}
 
 	while (resid) {
@@ -2502,6 +2560,27 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 			if ((offset + length) & ~PAGE_MASK)
 				page_count--;	/* more on last page */
 			pages += page_count;
+		} else if (type == OBJ_REQUEST_SG) {
+			u64 sg_length = 0;
+
+			obj_request->init_sg_offset = sg_offset;
+			obj_request->sg = sgl;
+			do {
+				sg_length += (sgl->length - sg_offset);
+				sg_offset = 0;
+				if (sg_length > length) {
+					sg_offset = sgl->length -
+							(sg_length - length);
+					break;
+				}
+				/*
+				 * For WRITE_SAME we have a single sg that
+				 * is written possibly multiple times over
+				 * img_request->length bytes.
+				 */
+				if (sg_next(sgl))
+					sgl = sg_next(sgl);
+			} while (true);
 		}
 
 		osd_req = rbd_osd_req_create(rbd_dev, op_type,
@@ -3058,9 +3137,13 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
 	if (obj_request->type == OBJ_REQUEST_BIO)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
 						obj_request->bio_list);
-	else
+	else if (obj_request->type == OBJ_REQUEST_PAGES)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
 						obj_request->pages);
+	else
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_SG,
+						obj_request->sg);
+
 	if (result)
 		goto out_err;
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux SCSI]     [Kernel Newbies]     [Linux SCSI Target Infrastructure]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Device Mapper]

  Powered by Linux