Christoph Hellwig wrote:
Inspired by vhost-net implementation, I did initial prototype
of vhost-blk to see if it provides any benefits over QEMU virtio-blk.
I haven't handled all the error cases, fixed naming conventions etc.,
but the implementation is stable to play with. I tried not to deviate
from vhost-net implementation where possible.
Can you also send the qemu side of it?
Its pretty hacky and based it on old patch (vhost-net) from MST for
simplicity.
I haven't focused on cleaning it up and I will re-base it on MST's
latest code
once it gets into QEMU.
Thanks,
Badari
---
hw/virtio-blk.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 199 insertions(+)
Index: vhost/hw/virtio-blk.c
===================================================================
--- vhost.orig/hw/virtio-blk.c 2010-02-25 16:47:04.000000000 -0500
+++ vhost/hw/virtio-blk.c 2010-03-17 14:07:26.477430740 -0400
@@ -18,6 +18,7 @@
#ifdef __linux__
# include <scsi/sg.h>
#endif
+#include <kvm.h>
typedef struct VirtIOBlock
{
@@ -28,8 +29,13 @@
char serial_str[BLOCK_SERIAL_STRLEN + 1];
QEMUBH *bh;
size_t config_size;
+ uint8_t vhost_started;
} VirtIOBlock;
+typedef struct BDRVRawState {
+ int fd;
+} BDRVRawState;
+
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
return (VirtIOBlock *)vdev;
@@ -501,6 +507,198 @@
return 0;
}
+#if 1
+#include "linux/vhost.h"
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include "vhost.h"
+
+int vhost_blk_fd;
+
+struct slot_info {
+ unsigned long phys_addr;
+ unsigned long len;
+ unsigned long userspace_addr;
+ unsigned flags;
+ int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
+
+static int vhost_blk_start(struct VirtIODevice *vdev)
+{
+ target_phys_addr_t s, l, a;
+ int r, num, idx = 0;
+ struct vhost_vring_state state;
+ struct vhost_vring_file file;
+ struct vhost_vring_addr addr;
+ unsigned long long used_phys;
+ void *desc, *avail, *used;
+ int i, n =0;
+ struct VirtQueue *q = virtio_queue(vdev, idx);
+ VirtIOBlock *vb = to_virtio_blk(vdev);
+ struct vhost_memory *mem;
+ BDRVRawState *st = vb->bs->opaque;
+
+ vhost_blk_fd = open("/dev/vhost-blk", O_RDWR);
+ if (vhost_blk_fd < 0) {
+ fprintf(stderr, "unable to open vhost-blk\n");
+ return -errno;
+ }
+
+ r = ioctl(vhost_blk_fd, VHOST_SET_OWNER, NULL);
+ if (r < 0) {
+ fprintf(stderr, "ioctl VHOST_SET_OWNER failed\n");
+ return -errno;
+ }
+
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len ||
+ (slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ ++n;
+ }
+
+ mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+ n * sizeof(struct vhost_memory_region));
+ if (!mem)
+ return -ENOMEM;
+
+ mem->nregions = n;
+ n = 0;
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len || (slots[i].flags &
+ KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ mem->regions[n].guest_phys_addr = slots[i].phys_addr;
+ mem->regions[n].memory_size = slots[i].len;
+ mem->regions[n].userspace_addr = slots[i].userspace_addr;
+ ++n;
+ }
+
+ r = ioctl(vhost_blk_fd, VHOST_SET_MEM_TABLE, mem);
+ if (r < 0)
+ return -errno;
+
+ state.index = idx;
+ num = state.num = virtio_queue_get_num(vdev, idx);
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_NUM, &state);
+ if (r) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_NUM failed\n");
+ return -errno;
+ }
+
+ state.num = virtio_queue_last_avail_idx(vdev, idx);
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_BASE, &state);
+ if (r) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_BASE failed\n");
+ return -errno;
+ }
+
+ s = l = sizeof(struct vring_desc) * num;
+ a = virtio_queue_get_desc(vdev, idx);
+ desc = cpu_physical_memory_map(a, &l, 0);
+ if (!desc || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+ s = l = offsetof(struct vring_avail, ring) +
+ sizeof(u_int64_t) * num;
+ a = virtio_queue_get_avail(vdev, idx);
+ avail = cpu_physical_memory_map(a, &l, 0);
+ if (!avail || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+ s = l = offsetof(struct vring_used, ring) +
+ sizeof(struct vring_used_elem) * num;
+ used_phys = a = virtio_queue_get_used(vdev, idx);
+ used = cpu_physical_memory_map(a, &l, 1);
+ if (!used || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+
+ addr.index = idx,
+ addr.desc_user_addr = (u_int64_t)(unsigned long)desc,
+ addr.avail_user_addr = (u_int64_t)(unsigned long)avail,
+ addr.used_user_addr = (u_int64_t)(unsigned long)used,
+ addr.log_guest_addr = used_phys,
+ addr.flags = 0;
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_ADDR, &addr);
+ if (r < 0) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_ADDR failed\n");
+ r = -errno;
+ goto fail_alloc;
+ }
+ if (!vdev->binding->guest_notifier || !vdev->binding->host_notifier) {
+ fprintf(stderr, "binding does not support irqfd/queuefd\n");
+ r = -ENOSYS;
+ goto fail_alloc;
+ }
+ r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, true);
+ if (r < 0) {
+ fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+ goto fail_guest_notifier;
+ }
+
+ r = vdev->binding->host_notifier(vdev->binding_opaque, idx, true);
+ if (r < 0) {
+ fprintf(stderr, "Error binding host notifier: %d\n", -r);
+ goto fail_host_notifier;
+ }
+
+ file.index = idx;
+ file.fd = event_notifier_get_fd(virtio_queue_host_notifier(q));
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_KICK, &file);
+ if (r) {
+ goto fail_kick;
+ }
+
+ file.fd = event_notifier_get_fd(virtio_queue_guest_notifier(q));
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_CALL, &file);
+ if (r) {
+ goto fail_call;
+ }
+ file.fd = st->fd;
+ r = ioctl(vhost_blk_fd, VHOST_NET_SET_BACKEND, &file);
+ if (r) {
+ r = -errno;
+ goto fail_call;
+ }
+ return 0;
+fail_call:
+fail_kick:
+ vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+fail_host_notifier:
+ vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+fail_guest_notifier:
+fail_alloc:
+ return r;
+}
+
+static void virtio_blk_set_status(struct VirtIODevice *vdev)
+{
+ VirtIOBlock *s = to_virtio_blk(vdev);
+
+ if (s->vhost_started)
+ return;
+
+ if (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ int r = vhost_blk_start(vdev);
+ if (r < 0) {
+ fprintf(stderr, "unable to start vhost blk: %d\n", r);
+ } else {
+ s->vhost_started = 1;
+ }
+ }
+}
+
+#endif
+
VirtIODevice *virtio_blk_init(DeviceState *dev, DriveInfo *dinfo)
{
VirtIOBlock *s;
@@ -517,6 +715,7 @@
s->config_size = size;
s->vdev.get_config = virtio_blk_update_config;
s->vdev.get_features = virtio_blk_get_features;
+ s->vdev.set_status = virtio_blk_set_status;
s->vdev.reset = virtio_blk_reset;
s->bs = dinfo->bdrv;
s->rq = NULL;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html