Re: [RFC] vhost-blk implementation

Badari Pulavarty <pbadari@xxxxxxxxxx> · Wed, 24 Mar 2010 13:27:31 -0700

Christoph Hellwig wrote:
Inspired by vhost-net implementation, I did initial prototype 
of vhost-blk to see if it provides any benefits over QEMU virtio-blk.
I haven't handled all the error cases, fixed naming conventions etc.,
but the implementation is stable to play with. I tried not to deviate
from vhost-net implementation where possible.
    

Can you also send the qemu side of it?
  
Its pretty hacky and based it on old patch (vhost-net) from MST for 
simplicity.
I haven't focused on cleaning it up and I will re-base it on MST's 
latest code
once it gets into QEMU.

Thanks,
Badari

---
hw/virtio-blk.c |  199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 199 insertions(+)

Index: vhost/hw/virtio-blk.c
===================================================================

--- vhost.orig/hw/virtio-blk.c	2010-02-25 16:47:04.000000000 -0500
+++ vhost/hw/virtio-blk.c	2010-03-17 14:07:26.477430740 -0400
@@ -18,6 +18,7 @@
#ifdef __linux__
# include <scsi/sg.h>
#endif
+#include <kvm.h>

typedef struct VirtIOBlock
{
@@ -28,8 +29,13 @@
    char serial_str[BLOCK_SERIAL_STRLEN + 1];
    QEMUBH *bh;
    size_t config_size;
+    uint8_t vhost_started;
} VirtIOBlock;

+typedef struct BDRVRawState {
+    int fd;
+} BDRVRawState;
+
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
    return (VirtIOBlock *)vdev;
@@ -501,6 +507,198 @@
    return 0;
}

+#if 1
+#include "linux/vhost.h"
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include "vhost.h"
+
+int vhost_blk_fd;
+
+struct slot_info {
+        unsigned long phys_addr;
+        unsigned long len;
+        unsigned long userspace_addr;
+        unsigned flags;
+        int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
+
+static int vhost_blk_start(struct VirtIODevice *vdev)
+{
+	target_phys_addr_t s, l, a;
+	int r, num, idx = 0;
+	struct vhost_vring_state state;
+	struct vhost_vring_file file;
+       struct vhost_vring_addr addr;
+	unsigned long long used_phys;
+	void *desc, *avail, *used;
+	int i, n =0;
+	struct VirtQueue *q = virtio_queue(vdev, idx);
+    	VirtIOBlock *vb = to_virtio_blk(vdev);
+	struct vhost_memory *mem;
+	BDRVRawState *st = vb->bs->opaque;
+
+	vhost_blk_fd = open("/dev/vhost-blk", O_RDWR);
+	if (vhost_blk_fd < 0) {
+		fprintf(stderr, "unable to open vhost-blk\n");
+		return -errno;
+	}
+
+	r = ioctl(vhost_blk_fd, VHOST_SET_OWNER, NULL);
+        if (r < 0) {
+		fprintf(stderr, "ioctl VHOST_SET_OWNER failed\n");
+                return -errno;
+	}
+
+        for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+                if (!slots[i].len ||
+			(slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+		                       continue;
+                }
+                ++n;
+        }
+
+        mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+                           n * sizeof(struct vhost_memory_region));
+        if (!mem)
+                return -ENOMEM;
+
+        mem->nregions = n;
+        n = 0;
+        for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+                if (!slots[i].len || (slots[i].flags &
+			KVM_MEM_LOG_DIRTY_PAGES)) {
+                        continue;
+                }
+                mem->regions[n].guest_phys_addr = slots[i].phys_addr;
+                mem->regions[n].memory_size = slots[i].len;
+                mem->regions[n].userspace_addr = slots[i].userspace_addr;
+                ++n;
+        }
+
+        r = ioctl(vhost_blk_fd, VHOST_SET_MEM_TABLE, mem);
+        if (r < 0)
+                return -errno;
+
+	state.index = idx;
+	num = state.num = virtio_queue_get_num(vdev, idx);
+	r = ioctl(vhost_blk_fd, VHOST_SET_VRING_NUM, &state);
+        if (r) {
+		fprintf(stderr, "ioctl VHOST_SET_VRING_NUM failed\n");
+                return -errno;
+        }
+
+	state.num = virtio_queue_last_avail_idx(vdev, idx);
+       r = ioctl(vhost_blk_fd, VHOST_SET_VRING_BASE, &state);
+       if (r) {
+		fprintf(stderr, "ioctl VHOST_SET_VRING_BASE failed\n");
+                return -errno;
+       }
+
+	s = l = sizeof(struct vring_desc) * num;
+	a = virtio_queue_get_desc(vdev, idx);
+	desc = cpu_physical_memory_map(a, &l, 0);
+       if (!desc || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+       }
+       s = l = offsetof(struct vring_avail, ring) +
+                sizeof(u_int64_t) * num;
+        a = virtio_queue_get_avail(vdev, idx);
+        avail = cpu_physical_memory_map(a, &l, 0);
+        if (!avail || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+        }
+        s = l = offsetof(struct vring_used, ring) +
+                sizeof(struct vring_used_elem) * num;
+        used_phys = a = virtio_queue_get_used(vdev, idx);
+        used = cpu_physical_memory_map(a, &l, 1);
+        if (!used || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+        }
+
+ 	addr.index = idx,
+	addr.desc_user_addr = (u_int64_t)(unsigned long)desc,
+	addr.avail_user_addr = (u_int64_t)(unsigned long)avail,
+	addr.used_user_addr = (u_int64_t)(unsigned long)used,
+	addr.log_guest_addr = used_phys,
+	addr.flags = 0;
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_ADDR, &addr);
+        if (r < 0) {
+		fprintf(stderr, "ioctl VHOST_SET_VRING_ADDR failed\n");
+		r = -errno;
+		goto fail_alloc;
+        }
+	if (!vdev->binding->guest_notifier || !vdev->binding->host_notifier) {
+                fprintf(stderr, "binding does not support irqfd/queuefd\n");
+                r = -ENOSYS;
+                goto fail_alloc;
+        }
+        r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, true);
+        if (r < 0) {
+                fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+                goto fail_guest_notifier;
+        }
+
+        r = vdev->binding->host_notifier(vdev->binding_opaque, idx, true);
+        if (r < 0) {
+                fprintf(stderr, "Error binding host notifier: %d\n", -r);
+                goto fail_host_notifier;
+        }
+
+	 file.index = idx;
+        file.fd = event_notifier_get_fd(virtio_queue_host_notifier(q));
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_KICK, &file);
+        if (r) {
+                goto fail_kick;
+        }
+
+        file.fd = event_notifier_get_fd(virtio_queue_guest_notifier(q));
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_CALL, &file);
+        if (r) {
+                goto fail_call;
+        }
+        file.fd =  st->fd;
+        r = ioctl(vhost_blk_fd, VHOST_NET_SET_BACKEND, &file);
+        if (r) {
+		r = -errno;
+                goto fail_call;
+	}
+	return 0;
+fail_call:
+fail_kick:
+        vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+fail_host_notifier:
+        vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+fail_guest_notifier:
+fail_alloc:
+        return r;
+}
+
+static void virtio_blk_set_status(struct VirtIODevice *vdev)
+{
+	VirtIOBlock *s = to_virtio_blk(vdev);
+
+	if (s->vhost_started)
+		return;
+
+	if (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+		int r = vhost_blk_start(vdev);
+		if (r < 0) {
+			fprintf(stderr, "unable to start vhost blk: %d\n", r);
+		} else {
+			s->vhost_started = 1;
+		}
+	}
+}
+
+#endif
+
VirtIODevice *virtio_blk_init(DeviceState *dev, DriveInfo *dinfo)
{
    VirtIOBlock *s;
@@ -517,6 +715,7 @@
    s->config_size = size;
    s->vdev.get_config = virtio_blk_update_config;
    s->vdev.get_features = virtio_blk_get_features;
+    s->vdev.set_status = virtio_blk_set_status;
    s->vdev.reset = virtio_blk_reset;
    s->bs = dinfo->bdrv;
    s->rq = NULL;




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html