Re: [PATCH] RBD: Add support readv,writev for rbd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Nov 5, 2016 at 1:17 AM,  <jazeltq@xxxxxxxxx> wrote:
> From: tianqing <tianqing@xxxxxxxxxxxxxxx>
>
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.
>
> Signed-off-by: tianqing <tianqing@xxxxxxxxxxxxxxx>
> ---
>  block/rbd.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 124 insertions(+)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..a405c02 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -53,6 +53,13 @@
>  #undef LIBRBD_SUPPORTS_DISCARD
>  #endif
>
> +/* rbd_aio_readv, rbd_aio_writev added in 0.1.11 */
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 11)
> +#define LIBRBD_SUPPORTS_IOV
> +#else
> +#undef LIBRBD_SUPPORTS_IOV
> +#endif
> +

The LIBRBD_SUPPORTS_IOVEC define is already defined by librbd.h if
it's supported.

>  #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
>
>  #define RBD_MAX_CONF_NAME_SIZE 128
> @@ -73,7 +80,10 @@ typedef struct RBDAIOCB {
>      BlockAIOCB common;
>      int64_t ret;
>      QEMUIOVector *qiov;
> +#ifdef LIBRBD_SUPPORTS_IOV
> +#else
>      char *bounce;
> +#endif

Could use "#ifndef" to avoid the empty section.

>      RBDAIOCmd cmd;
>      int error;
>      struct BDRVRBDState *s;
> @@ -83,7 +93,10 @@ typedef struct RADOSCB {
>      RBDAIOCB *acb;
>      struct BDRVRBDState *s;
>      int64_t size;
> +#ifdef LIBRBD_SUPPORTS_IOV
> +#else
>      char *buf;
> +#endif

Same comment here re: just using #ifndef

>      int64_t ret;
>  } RADOSCB;
>
> @@ -406,6 +419,48 @@ shutdown:
>      return ret;
>  }
>
> +
> +#ifdef LIBRBD_SUPPORTS_IOV
> +/*
> + * This aio completion is being called from rbd_finish_bh() and runs in qemu
> + * BH context.
> + */
> +static void qemu_rbd_complete_aio(RADOSCB *rcb)
> +{
> +    RBDAIOCB *acb = rcb->acb;
> +    int64_t r;
> +
> +    r = rcb->ret;
> +
> +    if (acb->cmd != RBD_AIO_READ) {
> +        if (r < 0) {
> +            acb->ret = r;
> +            acb->error = 1;
> +        } else if (!acb->error) {
> +            acb->ret = rcb->size;
> +        }
> +    } else {
> +        if (r < 0) {
> +            iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);
> +            acb->ret = r;
> +            acb->error = 1;
> +        } else if (r < rcb->size) {
> +            iov_memset(acb->qiov->iov, acb->qiov->niov,
> +                       rcb->size - r, 0, acb->qiov->size);
> +            if (!acb->error) {
> +                acb->ret = rcb->size;
> +            }
> +        } else if (!acb->error) {
> +            acb->ret = r;
> +        }
> +    }
> +
> +    g_free(rcb);
> +    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> +
> +    qemu_aio_unref(acb);
> +}

Instead of duplicating the existing qemu_rbd_complete_aio and changing
a couple lines, perhaps just #ifdef/#ifndef around the few lines that
need changes (e.g. the bounce buffer vs iov buffer).

> +#else
>  /*
>   * This aio completion is being called from rbd_finish_bh() and runs in qemu
>   * BH context.
> @@ -449,6 +504,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>
>      qemu_aio_unref(acb);
>  }
> +#endif
>
>  /* TODO Convert to fine grained options */
>  static QemuOptsList runtime_opts = {
> @@ -644,6 +700,73 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
>  #endif
>  }
>
> +#ifdef LIBRBD_SUPPORTS_IOV
> +static BlockAIOCB *rbd_start_aio_vec(BlockDriverState *bs,
> +                                 int64_t off,
> +                                 QEMUIOVector *qiov,
> +                                 int64_t size,
> +                                 BlockCompletionFunc *cb,
> +                                 void *opaque,
> +                                 RBDAIOCmd cmd)
> +{

Is this method being invoked? I would also recommend just
intermingling the bounce buffer vs iov differences into the existing
rbd_start_aio method instead of duplicating a lot of code.

> +    RBDAIOCB *acb;
> +    RADOSCB *rcb = NULL;
> +    rbd_completion_t c;
> +    int r;
> +
> +    BDRVRBDState *s = bs->opaque;
> +
> +    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
> +    acb->cmd = cmd;
> +    acb->qiov = qiov;
> +    assert(!qiov || qiov->size == size);
> +
> +    acb->ret = 0;
> +    acb->error = 0;
> +    acb->s = s;
> +
> +    rcb = g_new(RADOSCB, 1);
> +    rcb->acb = acb;
> +    rcb->s = acb->s;
> +    rcb->size = size;
> +    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
> +    if (r < 0) {
> +        goto failed;
> +    }
> +
> +    switch (cmd) {
> +    case RBD_AIO_WRITE:
> +        r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> +        break;
> +    case RBD_AIO_READ:
> +        r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> +        break;
> +    case RBD_AIO_DISCARD:
> +        r = rbd_aio_discard_wrapper(s->image, off, size, c);
> +        break;
> +    case RBD_AIO_FLUSH:
> +        r = rbd_aio_flush_wrapper(s->image, c);
> +        break;
> +    default:
> +        r = -EINVAL;
> +    }
> +
> +    if (r < 0) {
> +        goto failed_completion;
> +    }
> +
> +    return &acb->common;
> +
> +failed_completion:
> +    rbd_aio_release(c);
> +failed:
> +    g_free(rcb);
> +    qemu_aio_unref(acb);
> +    return NULL;
> +
> +}
> +
> +#else
>  static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>                                   int64_t off,
>                                   QEMUIOVector *qiov,
> @@ -723,6 +846,7 @@ failed:
>      qemu_aio_unref(acb);
>      return NULL;
>  }
> +#endif
>
>  static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
>                                        int64_t sector_num,
> --
> 2.10.2
>

In addition to the comments above, I think this patch should wait
until the IOV API changes are merged into librbd so that these changes
can be tested.

-- 
Jason
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux