Re: [PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/05/2011 11:37 PM, Sasha Levin wrote:
The new flag allows passing a connected socket instead of an
eventfd to be notified of writes or reads to the specified memory region.

Instead of signaling an event, On write - the value written to the memory
region is written to the pipe.
On read - a notification of the read is sent to the host, and a response
is expected with the value to be 'read'.

Using a socket instead of an eventfd is usefull when any value can be
written to the memory region but we're interested in recieving the
actual value instead of just a notification.

A simple example for practical use is the serial port. we are not
interested in an exit every time a char is written to the port, but
we do need to know what was written so we could handle it on the guest.

I suspect a normal uart is a bad use case for this.

The THR can only hold a single value, a guest is not supposed to write to the THR again until the THRE flag in the LSR is set which indicates that the THR is empty.

In fact, when THRE is raised, the device emulation should raise an interrupt and that's what should trigger the guest to write another value to the THR.

So in practice, I don't see how it would work without relying on some specific behavior of the current Linux uart guest code.

But that said, I think this is an interesting mechanism. I'd be curious how well it worked for VGA planar access which is what the current coalesced I/O is most useful for.

Regards,

Anthony Liguori


Cc: Avi Kivity<avi@xxxxxxxxxx>
Cc: Ingo Molnar<mingo@xxxxxxx>
Cc: Marcelo Tosatti<mtosatti@xxxxxxxxxx>
Cc: Michael S. Tsirkin<mst@xxxxxxxxxx>
Cc: Pekka Enberg<penberg@xxxxxxxxxx>
Signed-off-by: Sasha Levin<levinsasha928@xxxxxxxxx>
---
  Documentation/virtual/kvm/api.txt |   18 ++++-
  include/linux/kvm.h               |    9 ++
  virt/kvm/eventfd.c                |  153 ++++++++++++++++++++++++++++++++-----
  3 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 317d86a..74f0946 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1330,7 +1330,7 @@ Returns: 0 on success, !0 on error

  This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
  within the guest.  A guest write in the registered address will signal the
-provided event instead of triggering an exit.
+provided event or write to the provided socket instead of triggering an exit.

  struct kvm_ioeventfd {
  	__u64 datamatch;
@@ -1341,6 +1341,13 @@ struct kvm_ioeventfd {
  	__u8  pad[36];
  };

+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
  The following flags are defined:

  #define KVM_IOEVENTFD_FLAG_DATAMATCH (1<<  kvm_ioeventfd_flag_nr_datamatch)
@@ -1348,6 +1355,7 @@ The following flags are defined:
  #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1<<  kvm_ioeventfd_flag_nr_deassign)
  #define KVM_IOEVENTFD_FLAG_READ      (1<<  kvm_ioeventfd_flag_nr_read)
  #define KVM_IOEVENTFD_FLAG_NOWRITE   (1<<  kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1<<  kvm_ioeventfd_flag_nr_socket)

  If datamatch flag is set, the event will be signaled only if the written value
  to the registered address is equal to datamatch in struct kvm_ioeventfd.
@@ -1359,6 +1367,14 @@ passed in datamatch.
  If the nowrite flag is set, the event won't be signaled when the specified address
  is being written to.

+If the socket flag is set, fd is expected to be a connected AF_UNIX
+SOCK_SEQPACKET socket. Once a guest write in the registered address is
+detected - a struct kvm_ioeventfd_data which describes the write will be
+written to the socket.
+On read, struct kvm_ioeventfd_data will be written with 'is_write = 0', and
+would wait for a response with a struct kvm_ioeventfd_data containing the
+value which should be 'read' by the guest.
+

  5. The kvm_run structure

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a12711..ff3d808 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -389,6 +389,7 @@ enum {
  	kvm_ioeventfd_flag_nr_deassign,
  	kvm_ioeventfd_flag_nr_read,
  	kvm_ioeventfd_flag_nr_nowrite,
+	kvm_ioeventfd_flag_nr_socket,
  	kvm_ioeventfd_flag_nr_max,
  };

@@ -397,6 +398,7 @@ enum {
  #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1<<  kvm_ioeventfd_flag_nr_deassign)
  #define KVM_IOEVENTFD_FLAG_READ      (1<<  kvm_ioeventfd_flag_nr_read)
  #define KVM_IOEVENTFD_FLAG_NOWRITE   (1<<  kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1<<  kvm_ioeventfd_flag_nr_socket)

  #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1<<  kvm_ioeventfd_flag_nr_max) - 1)

@@ -409,6 +411,13 @@ struct kvm_ioeventfd {
  	__u8  pad[36];
  };

+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
  /* for KVM_ENABLE_CAP */
  struct kvm_enable_cap {
  	/* in */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 5f2d203..d1d63b3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@
  #include<linux/eventfd.h>
  #include<linux/kernel.h>
  #include<linux/slab.h>
+#include<linux/net.h>

  #include "iodev.h"

@@ -413,10 +414,11 @@ module_exit(irqfd_module_exit);

  /*
   * --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
+ *            a socket write.
   *
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
+ * userspace can register a PIO/MMIO address with an eventfd or a
+ * socket for receiving notification when the memory has been touched.
   * --------------------------------------------------------------------
   */

@@ -424,7 +426,10 @@ struct _ioeventfd {
  	struct list_head     list;
  	u64                  addr;
  	int                  length;
-	struct eventfd_ctx  *eventfd;
+	union {
+		struct socket       *sock;
+		struct eventfd_ctx  *eventfd;
+	};
  	u64                  datamatch;
  	struct kvm_io_device dev;
  	bool                 wildcard;
@@ -441,7 +446,11 @@ to_ioeventfd(struct kvm_io_device *dev)
  static void
  ioeventfd_release(struct _ioeventfd *p)
  {
-	eventfd_ctx_put(p->eventfd);
+	if (p->eventfd)
+		eventfd_ctx_put(p->eventfd);
+	else
+		sockfd_put(p->sock);
+
  	list_del(&p->list);
  	kfree(p);
  }
@@ -510,12 +519,65 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
  	return _val == p->datamatch ? true : false;
  }

+static ssize_t socket_write(struct socket *sock, const void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov =&iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_sendmsg(sock,&msg, count);
+	set_fs(old_fs);
+
+	return res;
+}
+
+static ssize_t socket_read(struct socket *sock, void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov =&iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_recvmsg(sock,&msg, count, 0);
+	set_fs(old_fs);
+
+	return res;
+}
+
  /* MMIO/PIO writes trigger an event if the addr/val match */
  static int
  ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
  		const void *val)
  {
  	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;

  	/* Exit if signaling on writes isn't requested */
  	if (!p->track_writes)
@@ -524,7 +586,18 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
  	if (!ioeventfd_in_range(p, addr, len, val))
  		return -EOPNOTSUPP;

-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.data = get_val(val, len),
+		.addr = addr,
+		.len = len,
+		.is_write = 1,
+	};
+
+	if (p->sock)
+		socket_write(p->sock,&data, sizeof(data));
+	else
+		eventfd_signal(p->eventfd, 1);
+
  	return 0;
  }

@@ -534,6 +607,7 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
  		void *val)
  {
  	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;

  	/* Exit if signaling on reads isn't requested */
  	if (!p->track_reads)
@@ -542,7 +616,21 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
  	if (!ioeventfd_in_range(p, addr, len, val))
  		return -EOPNOTSUPP;

-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.addr = addr,
+		.len = len,
+		.is_write = 0,
+	};
+
+	if (p->sock) {
+		socket_write(p->sock,&data, sizeof(data));
+		socket_read(p->sock,&data, sizeof(data));
+		set_val(val, len, data.data);
+	} else {
+		set_val(val, len, p->datamatch);
+		eventfd_signal(p->eventfd, 1);
+	}
+
  	return 0;
  }

@@ -585,7 +673,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  	int                       pio = args->flags&  KVM_IOEVENTFD_FLAG_PIO;
  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
  	struct _ioeventfd        *p;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
  	int                       ret;

  	/* check for range overflow */
@@ -596,10 +684,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  	if (args->flags&  ~KVM_IOEVENTFD_VALID_FLAG_MASK)
  		return -EINVAL;

-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
-
  	p = kzalloc(sizeof(*p), GFP_KERNEL);
  	if (!p) {
  		ret = -ENOMEM;
@@ -611,6 +695,20 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  	p->length  = args->len;
  	p->eventfd = eventfd;

+	if (args->flags&  KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		p->sock = sockfd_lookup(args->fd,&ret);
+		if (ret)
+			goto fail;
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			goto fail;
+
+		p->eventfd = eventfd;
+	}
+
  	/* The datamatch feature is optional, otherwise this is a wildcard */
  	if (args->flags&  KVM_IOEVENTFD_FLAG_DATAMATCH)
  		p->datamatch = args->datamatch;
@@ -649,8 +747,14 @@ unlock_fail:
  	mutex_unlock(&kvm->slots_lock);

  fail:
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+
+	if (p->sock)
+		sockfd_put(p->sock);
+
+
  	kfree(p);
-	eventfd_ctx_put(eventfd);

  	return ret;
  }
@@ -661,12 +765,21 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  	int                       pio = args->flags&  KVM_IOEVENTFD_FLAG_PIO;
  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
  	struct _ioeventfd        *p, *tmp;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
+	struct socket            *sock = NULL;
  	int                       ret = -ENOENT;

-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
+	if (args->flags&  KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		sock = sockfd_lookup(args->fd,&ret);
+		if (ret)
+			return PTR_ERR(sock);
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			return PTR_ERR(eventfd);
+	}

  	mutex_lock(&kvm->slots_lock);

@@ -674,6 +787,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  		bool wildcard = !(args->flags&  KVM_IOEVENTFD_FLAG_DATAMATCH);

  		if (p->eventfd != eventfd  ||
+		    p->sock != sock        ||
  		    p->addr != args->addr  ||
  		    p->length != args->len ||
  		    p->wildcard != wildcard)
@@ -690,7 +804,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)

  	mutex_unlock(&kvm->slots_lock);

-	eventfd_ctx_put(eventfd);
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+	if (sock)
+		sockfd_put(sock);

  	return ret;
  }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux