Re: [Qemu-devel] Re: [PATCH] ceph/rbd block driver for qemu-kvm (v3)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jul 13, 2010 at 11:27:03AM -0700, Yehuda Sadeh Weinraub wrote:
> >
> > There is another problem with very large i/o requests. I suspect that
> > this can be triggered only
> > with qemu-io and not in kvm, but I'll try to get a proper solution it anyway.
> >
> 
> Have you made any progress with this issue? Just note that there were
> a few changes we introduced recently (a format change that allows
> renaming of rbd images, and some snapshots support), so everything
> will needed to be reposted once we figure out the aio issue.

Attached is a patch where I'm trying to solve the issue
with pthreads locking. It works well with qemu-io, but I'm
not sure if there are interferences with other threads in
qemu/kvm (I didn't have time to test this, yet).

Another thing I'm not sure about is the fact, that these
large I/O requests only happen with qemu-io. I've never seen
this happen inside a virtual machine. So do we really have
to fix this, as it is only a warning message (laggy).

Regards,

Christian

>From fcef3d897e0357b252a189ed59e43bfd5c24d229 Mon Sep 17 00:00:00 2001
From: Christian Brunner <chb@xxxxxx>
Date: Tue, 22 Jun 2010 21:51:09 +0200
Subject: [PATCH 27/27] add queueing delay based on queuesize

---
 block/rbd.c |   31 ++++++++++++++++++++++++++++++-
 1 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 10daf20..c6693d7 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -24,7 +24,7 @@
 #include <rados/librados.h>
 
 #include <signal.h>
-
+#include <pthread.h>
 
 int eventfd(unsigned int initval, int flags);
 
@@ -50,6 +50,7 @@ int eventfd(unsigned int initval, int flags);
  */
 
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
+#define MAX_QUEUE_SIZE 33554432 // 32MB
 
 typedef struct RBDAIOCB {
     BlockDriverAIOCB common;
@@ -79,6 +80,9 @@ typedef struct BDRVRBDState {
     uint64_t size;
     uint64_t objsize;
     int qemu_aio_count;
+    uint64_t queuesize;
+    pthread_mutex_t *queue_mutex;
+    pthread_cond_t *queue_threshold;
 } BDRVRBDState;
 
 typedef struct rbd_obj_header_ondisk RbdHeader1;
@@ -334,6 +338,12 @@ static int rbd_open(BlockDriverState *bs, const char *filename, int flags)
     le64_to_cpus((uint64_t *) & header->image_size);
     s->size = header->image_size;
     s->objsize = 1 << header->options.order;
+    s->queuesize = 0;
+
+    s->queue_mutex = qemu_malloc(sizeof(pthread_mutex_t));
+    pthread_mutex_init(s->queue_mutex, NULL);
+    s->queue_threshold = qemu_malloc(sizeof(pthread_cond_t));
+    pthread_cond_init (s->queue_threshold, NULL);
 
     s->efd = eventfd(0, 0);
     if (s->efd < 0) {
@@ -356,6 +366,11 @@ static void rbd_close(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
 
+    pthread_cond_destroy(s->queue_threshold);
+    qemu_free(s->queue_threshold);
+    pthread_mutex_destroy(s->queue_mutex);
+    qemu_free(s->queue_mutex);
+
     rados_close_pool(s->pool);
     rados_deinitialize();
 }
@@ -443,6 +458,12 @@ static void rbd_finish_aiocb(rados_completion_t c, RADOSCB *rcb)
     int i;
 
     acb->aiocnt--;
+    acb->s->queuesize -= rcb->segsize;
+    if (acb->s->queuesize+rcb->segsize > MAX_QUEUE_SIZE && acb->s->queuesize <= MAX_QUEUE_SIZE) {
+        pthread_mutex_lock(acb->s->queue_mutex);
+        pthread_cond_signal(acb->s->queue_threshold);
+        pthread_mutex_unlock(acb->s->queue_mutex);
+    }
     r = rados_aio_get_return_value(c);
     rados_aio_release(c);
     if (acb->write) {
@@ -560,6 +581,14 @@ static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
         rcb->segsize = segsize;
         rcb->buf = buf;
 
+        while  (s->queuesize > MAX_QUEUE_SIZE) {
+            pthread_mutex_lock(s->queue_mutex);
+            pthread_cond_wait(s->queue_threshold, s->queue_mutex);
+            pthread_mutex_unlock(s->queue_mutex);
+        }
+
+        s->queuesize += segsize;
+
         if (write) {
             rados_aio_create_completion(rcb, NULL,
                                         (rados_callback_t) rbd_finish_aiocb,
-- 
1.7.0.4


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux