On 04/09/2011 07:19 AM, Sasha Levin wrote:
Attempt to use mmap first for working with a disk image, if the attempt is failed (for example, large image on a 32bit system) fallback to using read/write.
I think you're likely seeing a secondary affect.
Right now, you issue synchronous I/O on the VCPU exit path. Blocking
the VCPU to do I/O not only slows down I/O (since you're only handling
one request at a time) but it has secondary affects on the guest since
normally the guest can do useful work while waiting for I/O to complete.
You really want to defer I/O to a thread pool to allow the VCPU to
continue execution while the I/O is being processed.
In QEMU, we use ioeventfd() with the virtio pci notify register which
allows us to avoid heavy weight exits in this path. We handle the
ioeventfd in a single thread and then dispatch the I/O to the thread pool.
There might be a clever way to avoid that extra context switch. It may
involve doing something like X = read(eventfd); write(eventfd, X - 1);
though which seems pretty gnarly to me.
Regards,
Anthony Liguori
Performance (kB/s) test using bonnie++ showed the following improvement:
kvm cmdline: ./kvm run --mem=256 --image=./work/vms/gentoo.img --kernel=/boot/bzImage-git
bonnie++ cmdline: bonnie++ -u 0
Before:
Version 1.96 ------Sequential Output----- --Sequential Input- -Random-
Concurrency 1 -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
tux 480M 673 100 308017 61 288156 49 3286 99 892186 76 +++++ +++
Latency 12998us 50992us 35993us 3000us 1999us 201ms
Version 1.96 ------Sequential Create------ --------Random Create--------
tux -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
files /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP
16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
Latency 3000us 1000us 1000us 1000us 1998us 1000us
After:
Version 1.96 ------Sequential Output------ --Sequential Input- --Random-
Concurrency 1 -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
tux 480M 672 99 415549 45 310350 52 3495 99 910360 83 +++++ +++
Latency 12998us 7998us 23996us 3998us 1000us 116ms
Version 1.96 ------Sequential Create------ --------Random Create--------
tux -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
files /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP
16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
Latency 1000us 1000us 1000us 2000us 1000us 1999us
Signed-off-by: Sasha Levin<levinsasha928@xxxxxxxxx>
---
tools/kvm/disk-image.c | 54 +++++++++++++++++++++++++++++------
tools/kvm/include/kvm/disk-image.h | 2 +-
2 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/tools/kvm/disk-image.c b/tools/kvm/disk-image.c
index 9deaf45..dbae1e4 100644
--- a/tools/kvm/disk-image.c
+++ b/tools/kvm/disk-image.c
@@ -13,21 +13,31 @@
#include<unistd.h>
#include<fcntl.h>
-struct disk_image *disk_image__new(int fd, uint64_t size, struct disk_image_operations *ops)
+static int raw_image__read_sector_mmap(struct disk_image *self, uint64_t sector, void *dst, uint32_t dst_len)
{
- struct disk_image *self;
+ uint64_t offset = sector<< SECTOR_SHIFT;
- self = malloc(sizeof *self);
- if (!self)
- return NULL;
+ if (offset + dst_len> self->size)
+ return -1;
- self->fd = fd;
- self->size = size;
- self->ops = ops;
+ memcpy(dst, self->priv + offset, dst_len);
- return self;
+ return 0;
}
+static int raw_image__write_sector_mmap(struct disk_image *self, uint64_t sector, void *src, uint32_t src_len)
+{
+ uint64_t offset = sector<< SECTOR_SHIFT;
+
+ if (offset + src_len> self->size)
+ return -1;
+
+ memcpy(self->priv + offset, src, src_len);
+
+ return 0;
+}
+
+
static int raw_image__read_sector(struct disk_image *self, uint64_t sector, void *dst, uint32_t dst_len)
{
uint64_t offset = sector<< SECTOR_SHIFT;
@@ -59,6 +69,27 @@ static struct disk_image_operations raw_image_ops = {
.write_sector = raw_image__write_sector,
};
+static struct disk_image_operations raw_image_mmap_ops = {
+ .read_sector = raw_image__read_sector_mmap,
+ .write_sector = raw_image__write_sector_mmap,
+};
+
+struct disk_image *disk_image__new(int fd, uint64_t size)
+{
+ struct disk_image *self;
+
+ self = malloc(sizeof *self);
+ if (!self)
+ return NULL;
+
+ self->fd = fd;
+ self->size = size;
+ self->priv = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_NORESERVE, fd, 0);
+ self->ops = (self->priv == MAP_FAILED) ?&raw_image_ops :&raw_image_mmap_ops;
+
+ return self;
+}
+
static struct disk_image *raw_image__probe(int fd)
{
struct stat st;
@@ -66,7 +97,7 @@ static struct disk_image *raw_image__probe(int fd)
if (fstat(fd,&st)< 0)
return NULL;
- return disk_image__new(fd, st.st_size,&raw_image_ops);
+ return disk_image__new(fd, st.st_size);
}
struct disk_image *disk_image__open(const char *filename)
@@ -97,6 +128,9 @@ void disk_image__close(struct disk_image *self)
if (self->ops->close)
self->ops->close(self);
+ if (self->priv != MAP_FAILED)
+ munmap(self->priv, self->size);
+
if (close(self->fd)< 0)
warning("close() failed");
diff --git a/tools/kvm/include/kvm/disk-image.h b/tools/kvm/include/kvm/disk-image.h
index df0a15d..8b78657 100644
--- a/tools/kvm/include/kvm/disk-image.h
+++ b/tools/kvm/include/kvm/disk-image.h
@@ -22,7 +22,7 @@ struct disk_image {
};
struct disk_image *disk_image__open(const char *filename);
-struct disk_image *disk_image__new(int fd, uint64_t size, struct disk_image_operations *ops);
+struct disk_image *disk_image__new(int fd, uint64_t size);
void disk_image__close(struct disk_image *self);
static inline int disk_image__read_sector(struct disk_image *self, uint64_t sector, void *dst, uint32_t dst_len)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html