Re: [PATCH] RBD: Add support readv,writev for rbd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Thanks for this patch !

Do you see performance improvement (latencies , iops) ? (maybe do you have some fio benchmark results)

Regards,

Alexandre
 
----- Mail original -----
De: jazeltq@xxxxxxxxx
À: jdurgin@xxxxxxxxxx, "Jeff Cody" <jcody@xxxxxxxxxx>, jdillama@xxxxxxxxxx, "Kevin Wolf" <kwolf@xxxxxxxxxx>, mreitz@xxxxxxxxxx, qemu-block@xxxxxxxxxx
Cc: "qemu-devel" <qemu-devel@xxxxxxxxxx>, "ceph-devel" <ceph-devel@xxxxxxxxxxxxxxx>, "tianqing" <tianqing@xxxxxxxxxxxxxxx>
Envoyé: Samedi 5 Novembre 2016 06:17:21
Objet: [PATCH] RBD: Add support readv,writev for rbd

From: tianqing <tianqing@xxxxxxxxxxxxxxx> 

Rbd can do readv and writev directly, so wo do not need to transform 
iov to buf or vice versa any more. 

Signed-off-by: tianqing <tianqing@xxxxxxxxxxxxxxx> 
--- 
block/rbd.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 
1 file changed, 124 insertions(+) 

diff --git a/block/rbd.c b/block/rbd.c 
index a57b3e3..a405c02 100644 
--- a/block/rbd.c 
+++ b/block/rbd.c 
@@ -53,6 +53,13 @@ 
#undef LIBRBD_SUPPORTS_DISCARD 
#endif 

+/* rbd_aio_readv, rbd_aio_writev added in 0.1.11 */ 
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 11) 
+#define LIBRBD_SUPPORTS_IOV 
+#else 
+#undef LIBRBD_SUPPORTS_IOV 
+#endif 
+ 
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) 

#define RBD_MAX_CONF_NAME_SIZE 128 
@@ -73,7 +80,10 @@ typedef struct RBDAIOCB { 
BlockAIOCB common; 
int64_t ret; 
QEMUIOVector *qiov; 
+#ifdef LIBRBD_SUPPORTS_IOV 
+#else 
char *bounce; 
+#endif 
RBDAIOCmd cmd; 
int error; 
struct BDRVRBDState *s; 
@@ -83,7 +93,10 @@ typedef struct RADOSCB { 
RBDAIOCB *acb; 
struct BDRVRBDState *s; 
int64_t size; 
+#ifdef LIBRBD_SUPPORTS_IOV 
+#else 
char *buf; 
+#endif 
int64_t ret; 
} RADOSCB; 

@@ -406,6 +419,48 @@ shutdown: 
return ret; 
} 

+ 
+#ifdef LIBRBD_SUPPORTS_IOV 
+/* 
+ * This aio completion is being called from rbd_finish_bh() and runs in qemu 
+ * BH context. 
+ */ 
+static void qemu_rbd_complete_aio(RADOSCB *rcb) 
+{ 
+ RBDAIOCB *acb = rcb->acb; 
+ int64_t r; 
+ 
+ r = rcb->ret; 
+ 
+ if (acb->cmd != RBD_AIO_READ) { 
+ if (r < 0) { 
+ acb->ret = r; 
+ acb->error = 1; 
+ } else if (!acb->error) { 
+ acb->ret = rcb->size; 
+ } 
+ } else { 
+ if (r < 0) { 
+ iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); 
+ acb->ret = r; 
+ acb->error = 1; 
+ } else if (r < rcb->size) { 
+ iov_memset(acb->qiov->iov, acb->qiov->niov, 
+ rcb->size - r, 0, acb->qiov->size); 
+ if (!acb->error) { 
+ acb->ret = rcb->size; 
+ } 
+ } else if (!acb->error) { 
+ acb->ret = r; 
+ } 
+ } 
+ 
+ g_free(rcb); 
+ acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); 
+ 
+ qemu_aio_unref(acb); 
+} 
+#else 
/* 
* This aio completion is being called from rbd_finish_bh() and runs in qemu 
* BH context. 
@@ -449,6 +504,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) 

qemu_aio_unref(acb); 
} 
+#endif 

/* TODO Convert to fine grained options */ 
static QemuOptsList runtime_opts = { 
@@ -644,6 +700,73 @@ static int rbd_aio_flush_wrapper(rbd_image_t image, 
#endif 
} 

+#ifdef LIBRBD_SUPPORTS_IOV 
+static BlockAIOCB *rbd_start_aio_vec(BlockDriverState *bs, 
+ int64_t off, 
+ QEMUIOVector *qiov, 
+ int64_t size, 
+ BlockCompletionFunc *cb, 
+ void *opaque, 
+ RBDAIOCmd cmd) 
+{ 
+ RBDAIOCB *acb; 
+ RADOSCB *rcb = NULL; 
+ rbd_completion_t c; 
+ int r; 
+ 
+ BDRVRBDState *s = bs->opaque; 
+ 
+ acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque); 
+ acb->cmd = cmd; 
+ acb->qiov = qiov; 
+ assert(!qiov || qiov->size == size); 
+ 
+ acb->ret = 0; 
+ acb->error = 0; 
+ acb->s = s; 
+ 
+ rcb = g_new(RADOSCB, 1); 
+ rcb->acb = acb; 
+ rcb->s = acb->s; 
+ rcb->size = size; 
+ r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); 
+ if (r < 0) { 
+ goto failed; 
+ } 
+ 
+ switch (cmd) { 
+ case RBD_AIO_WRITE: 
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); 
+ break; 
+ case RBD_AIO_READ: 
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); 
+ break; 
+ case RBD_AIO_DISCARD: 
+ r = rbd_aio_discard_wrapper(s->image, off, size, c); 
+ break; 
+ case RBD_AIO_FLUSH: 
+ r = rbd_aio_flush_wrapper(s->image, c); 
+ break; 
+ default: 
+ r = -EINVAL; 
+ } 
+ 
+ if (r < 0) { 
+ goto failed_completion; 
+ } 
+ 
+ return &acb->common; 
+ 
+failed_completion: 
+ rbd_aio_release(c); 
+failed: 
+ g_free(rcb); 
+ qemu_aio_unref(acb); 
+ return NULL; 
+ 
+} 
+ 
+#else 
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, 
int64_t off, 
QEMUIOVector *qiov, 
@@ -723,6 +846,7 @@ failed: 
qemu_aio_unref(acb); 
return NULL; 
} 
+#endif 

static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs, 
int64_t sector_num, 
-- 
2.10.2 

-- 
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in 
the body of a message to majordomo@xxxxxxxxxxxxxxx 
More majordomo info at http://vger.kernel.org/majordomo-info.html 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux