vhost-blk is a in kernel virito-blk device accelerator. vhost-blk is similar with vhost-net. It handles virito-blk's request and completion in host kernel side. How to use: ----------------------------- Load the vhost-blk.ko module in host side and specify the vhost flag. $ lkvm run -d disk.img,vhost Performance evaluation: ----------------------------- The comparison is between kvm tool with usersapce implementation and kvm tool with vhost-blk. 1) Fio with libaio ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 8.4%, 15.3%, 10.4%, 14.6% Latency improvement: 8.5%, 15.4%, 10.4%, 15.1% 2) Fio with vsync ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 10.5%, 4.8%, 5.2%, 5.6% Latency improvement: 11.4%, 5.0%, 5.2%, 5.8% Signed-off-by: Asias He <asias.hejun@xxxxxxxxx> --- tools/kvm/builtin-run.c | 2 + tools/kvm/disk/core.c | 2 + tools/kvm/include/kvm/disk-image.h | 2 + tools/kvm/virtio/blk.c | 134 +++++++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 2 deletions(-) diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c index 8e1627e..0e213bf 100644 --- a/tools/kvm/builtin-run.c +++ b/tools/kvm/builtin-run.c @@ -178,6 +178,8 @@ static int img_name_parser(const struct option *opt, const char *arg, int unset) disk_image[image_count].readonly = true; else if (strncmp(sep + 1, "direct", 6) == 0) disk_image[image_count].direct = true; + else if (strncmp(sep + 1, "vhost", 5) == 0) + disk_image[image_count].use_vhost = true; *sep = 0; cur = sep + 1; } diff --git a/tools/kvm/disk/core.c b/tools/kvm/disk/core.c index 621c940..8aa5091 100644 --- a/tools/kvm/disk/core.c +++ b/tools/kvm/disk/core.c @@ -149,6 +149,8 @@ struct disk_image **disk_image__open_all(struct disk_image_params *params, int c err = disks[i]; goto error; } + if (params[i].use_vhost) + disks[i]->use_vhost = true; } return disks; diff --git a/tools/kvm/include/kvm/disk-image.h b/tools/kvm/include/kvm/disk-image.h index 7ae17f8..0a86515 100644 --- a/tools/kvm/include/kvm/disk-image.h +++ b/tools/kvm/include/kvm/disk-image.h @@ -41,6 +41,7 @@ struct disk_image_operations { struct disk_image_params { const char *filename; + bool use_vhost; bool readonly; bool direct; }; @@ -57,6 +58,7 @@ struct disk_image { #ifdef CONFIG_HAS_AIO io_context_t ctx; #endif + bool use_vhost; }; struct disk_image *disk_image__open(const char *filename, bool readonly, bool direct); diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c index beebd24..c1e2e18 100644 --- a/tools/kvm/virtio/blk.c +++ b/tools/kvm/virtio/blk.c @@ -12,6 +12,7 @@ #include "kvm/virtio-pci.h" #include "kvm/virtio.h" +#include <linux/vhost.h> #include <linux/virtio_ring.h> #include <linux/virtio_blk.h> #include <linux/kernel.h> @@ -19,6 +20,8 @@ #include <linux/types.h> #include <pthread.h> +/* TODO: We can remove this after VHOST_BLK_SET_BACKEND goes in linux/vhost.h */ +#define VHOST_BLK_SET_BACKEND _IOW(VHOST_VIRTIO, 0x40, struct vhost_vring_file) #define VIRTIO_BLK_MAX_DEV 4 /* @@ -50,6 +53,8 @@ struct blk_dev { struct virt_queue vqs[NUM_VIRT_QUEUES]; struct blk_dev_req reqs[VIRTIO_BLK_QUEUE_SIZE]; + int vhost_fd; + pthread_t io_thread; int io_efd; @@ -166,9 +171,12 @@ static void set_guest_features(struct kvm *kvm, void *dev, u32 features) static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) { + struct vhost_vring_state state = { .index = vq }; + struct vhost_vring_addr addr; struct blk_dev *bdev = dev; struct virt_queue *queue; void *p; + int r; compat__remove_message(compat_id); @@ -178,9 +186,83 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); + if (bdev->vhost_fd == 0) + return 0; + + state.num = queue->vring.num; + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_NUM, &state); + if (r < 0) + die_perror("VHOST_SET_VRING_NUM failed"); + state.num = 0; + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_BASE, &state); + if (r < 0) + die_perror("VHOST_SET_VRING_BASE failed"); + + addr = (struct vhost_vring_addr) { + .index = vq, + .desc_user_addr = (u64)(unsigned long)queue->vring.desc, + .avail_user_addr = (u64)(unsigned long)queue->vring.avail, + .used_user_addr = (u64)(unsigned long)queue->vring.used, + }; + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); + if (r < 0) + die_perror("VHOST_SET_VRING_ADDR failed"); + return 0; } +static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) +{ + struct vhost_vring_file file; + struct blk_dev *bdev = dev; + struct kvm_irqfd irq; + int r; + + if (bdev->vhost_fd == 0) + return; + + irq = (struct kvm_irqfd) { + .gsi = gsi, + .fd = eventfd(0, 0), + }; + file = (struct vhost_vring_file) { + .index = vq, + .fd = irq.fd, + }; + + r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq); + if (r < 0) + die_perror("KVM_IRQFD failed"); + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_CALL, &file); + if (r < 0) + die_perror("VHOST_SET_VRING_CALL failed"); + + file.fd = bdev->disk->fd; + r = ioctl(bdev->vhost_fd, VHOST_BLK_SET_BACKEND, &file); + if (r != 0) + die("VHOST_BLK_SET_BACKEND failed %d", errno); + +} + +static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) +{ + struct blk_dev *bdev = dev; + struct vhost_vring_file file = { + .index = vq, + .fd = efd, + }; + int r; + + if (bdev->vhost_fd == 0) + return; + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_KICK, &file); + if (r < 0) + die_perror("VHOST_SET_VRING_KICK failed"); +} + static void *virtio_blk_thread(void *dev) { struct blk_dev *bdev = dev; @@ -230,12 +312,56 @@ static struct virtio_ops blk_dev_virtio_ops = (struct virtio_ops) { .get_host_features = get_host_features, .set_guest_features = set_guest_features, .init_vq = init_vq, - .notify_vq = notify_vq, .get_pfn_vq = get_pfn_vq, .get_size_vq = get_size_vq, .set_size_vq = set_size_vq, + .notify_vq = notify_vq, + .notify_vq_gsi = notify_vq_gsi, + .notify_vq_eventfd = notify_vq_eventfd, }; +static void virtio_blk_vhost_init(struct kvm *kvm, struct blk_dev *bdev) +{ + u64 features; + struct vhost_memory *mem; + int r; + + bdev->vhost_fd = open("/dev/vhost-blk", O_RDWR); + if (bdev->vhost_fd < 0) + die_perror("Failed openning vhost-blk device"); + + mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region)); + if (mem == NULL) + die("Failed allocating memory for vhost memory map"); + + mem->nregions = 1; + mem->regions[0] = (struct vhost_memory_region) { + .guest_phys_addr = 0, + .memory_size = kvm->ram_size, + .userspace_addr = (unsigned long)kvm->ram_start, + }; + + r = ioctl(bdev->vhost_fd, VHOST_SET_OWNER); + if (r != 0) + die_perror("VHOST_SET_OWNER failed"); + + r = ioctl(bdev->vhost_fd, VHOST_GET_FEATURES, &features); + if (r != 0) + die_perror("VHOST_GET_FEATURES failed"); + + r = ioctl(bdev->vhost_fd, VHOST_SET_FEATURES, &features); + if (r != 0) + die_perror("VHOST_SET_FEATURES failed"); + r = ioctl(bdev->vhost_fd, VHOST_SET_MEM_TABLE, mem); + if (r != 0) + die_perror("VHOST_SET_MEM_TABLE failed"); + + bdev->vdev.use_vhost = true; + + free(mem); +} + + static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk) { struct blk_dev *bdev; @@ -271,7 +397,11 @@ static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk) disk_image__set_callback(bdev->disk, virtio_blk_complete); - pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev); + if (disk->use_vhost) + virtio_blk_vhost_init(kvm, bdev); + else + pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev); + if (compat_id == -1) compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK"); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html