[PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The new flag allows passing a connected socket instead of an
eventfd to be notified of writes or reads to the specified memory region.

Instead of signaling an event, On write - the value written to the memory
region is written to the pipe.
On read - a notification of the read is sent to the host, and a response
is expected with the value to be 'read'.

Using a socket instead of an eventfd is usefull when any value can be
written to the memory region but we're interested in recieving the
actual value instead of just a notification.

A simple example for practical use is the serial port. we are not
interested in an exit every time a char is written to the port, but
we do need to know what was written so we could handle it on the guest.

Cc: Avi Kivity <avi@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
Cc: Pekka Enberg <penberg@xxxxxxxxxx>
Signed-off-by: Sasha Levin <levinsasha928@xxxxxxxxx>
---
 Documentation/virtual/kvm/api.txt |   18 ++++-
 include/linux/kvm.h               |    9 ++
 virt/kvm/eventfd.c                |  153 ++++++++++++++++++++++++++++++++-----
 3 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 317d86a..74f0946 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1330,7 +1330,7 @@ Returns: 0 on success, !0 on error
 
 This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
 within the guest.  A guest write in the registered address will signal the
-provided event instead of triggering an exit.
+provided event or write to the provided socket instead of triggering an exit.
 
 struct kvm_ioeventfd {
 	__u64 datamatch;
@@ -1341,6 +1341,13 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
 The following flags are defined:
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
@@ -1348,6 +1355,7 @@ The following flags are defined:
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
@@ -1359,6 +1367,14 @@ passed in datamatch.
 If the nowrite flag is set, the event won't be signaled when the specified address
 is being written to.
 
+If the socket flag is set, fd is expected to be a connected AF_UNIX
+SOCK_SEQPACKET socket. Once a guest write in the registered address is
+detected - a struct kvm_ioeventfd_data which describes the write will be
+written to the socket.
+On read, struct kvm_ioeventfd_data will be written with 'is_write = 0', and
+would wait for a response with a struct kvm_ioeventfd_data containing the
+value which should be 'read' by the guest.
+
 
 5. The kvm_run structure
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a12711..ff3d808 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -389,6 +389,7 @@ enum {
 	kvm_ioeventfd_flag_nr_deassign,
 	kvm_ioeventfd_flag_nr_read,
 	kvm_ioeventfd_flag_nr_nowrite,
+	kvm_ioeventfd_flag_nr_socket,
 	kvm_ioeventfd_flag_nr_max,
 };
 
@@ -397,6 +398,7 @@ enum {
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
 
@@ -409,6 +411,13 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
 	/* in */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 5f2d203..d1d63b3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/net.h>
 
 #include "iodev.h"
 
@@ -413,10 +414,11 @@ module_exit(irqfd_module_exit);
 
 /*
  * --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
+ *            a socket write.
  *
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
+ * userspace can register a PIO/MMIO address with an eventfd or a
+ * socket for receiving notification when the memory has been touched.
  * --------------------------------------------------------------------
  */
 
@@ -424,7 +426,10 @@ struct _ioeventfd {
 	struct list_head     list;
 	u64                  addr;
 	int                  length;
-	struct eventfd_ctx  *eventfd;
+	union {
+		struct socket       *sock;
+		struct eventfd_ctx  *eventfd;
+	};
 	u64                  datamatch;
 	struct kvm_io_device dev;
 	bool                 wildcard;
@@ -441,7 +446,11 @@ to_ioeventfd(struct kvm_io_device *dev)
 static void
 ioeventfd_release(struct _ioeventfd *p)
 {
-	eventfd_ctx_put(p->eventfd);
+	if (p->eventfd)
+		eventfd_ctx_put(p->eventfd);
+	else
+		sockfd_put(p->sock);
+
 	list_del(&p->list);
 	kfree(p);
 }
@@ -510,12 +519,65 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 	return _val == p->datamatch ? true : false;
 }
 
+static ssize_t socket_write(struct socket *sock, const void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_sendmsg(sock, &msg, count);
+	set_fs(old_fs);
+
+	return res;
+}
+
+static ssize_t socket_read(struct socket *sock, void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_recvmsg(sock, &msg, count, 0);
+	set_fs(old_fs);
+
+	return res;
+}
+
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
 		const void *val)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;
 
 	/* Exit if signaling on writes isn't requested */
 	if (!p->track_writes)
@@ -524,7 +586,18 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
 	if (!ioeventfd_in_range(p, addr, len, val))
 		return -EOPNOTSUPP;
 
-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.data = get_val(val, len),
+		.addr = addr,
+		.len = len,
+		.is_write = 1,
+	};
+
+	if (p->sock)
+		socket_write(p->sock, &data, sizeof(data));
+	else
+		eventfd_signal(p->eventfd, 1);
+
 	return 0;
 }
 
@@ -534,6 +607,7 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
 		void *val)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;
 
 	/* Exit if signaling on reads isn't requested */
 	if (!p->track_reads)
@@ -542,7 +616,21 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
 	if (!ioeventfd_in_range(p, addr, len, val))
 		return -EOPNOTSUPP;
 
-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.addr = addr,
+		.len = len,
+		.is_write = 0,
+	};
+
+	if (p->sock) {
+		socket_write(p->sock, &data, sizeof(data));
+		socket_read(p->sock, &data, sizeof(data));
+		set_val(val, len, data.data);
+	} else {
+		set_val(val, len, p->datamatch);
+		eventfd_signal(p->eventfd, 1);
+	}
+
 	return 0;
 }
 
@@ -585,7 +673,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
 	int                       ret;
 
 	/* check for range overflow */
@@ -596,10 +684,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
 		return -EINVAL;
 
-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
-
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
 		ret = -ENOMEM;
@@ -611,6 +695,20 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	p->length  = args->len;
 	p->eventfd = eventfd;
 
+	if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		p->sock = sockfd_lookup(args->fd, &ret);
+		if (ret)
+			goto fail;
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			goto fail;
+
+		p->eventfd = eventfd;
+	}
+
 	/* The datamatch feature is optional, otherwise this is a wildcard */
 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
 		p->datamatch = args->datamatch;
@@ -649,8 +747,14 @@ unlock_fail:
 	mutex_unlock(&kvm->slots_lock);
 
 fail:
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+
+	if (p->sock)
+		sockfd_put(p->sock);
+
+
 	kfree(p);
-	eventfd_ctx_put(eventfd);
 
 	return ret;
 }
@@ -661,12 +765,21 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
+	struct socket            *sock = NULL;
 	int                       ret = -ENOENT;
 
-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
+	if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		sock = sockfd_lookup(args->fd, &ret);
+		if (ret)
+			return PTR_ERR(sock);
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			return PTR_ERR(eventfd);
+	}
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -674,6 +787,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
 
 		if (p->eventfd != eventfd  ||
+		    p->sock != sock        ||
 		    p->addr != args->addr  ||
 		    p->length != args->len ||
 		    p->wildcard != wildcard)
@@ -690,7 +804,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	mutex_unlock(&kvm->slots_lock);
 
-	eventfd_ctx_put(eventfd);
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+	if (sock)
+		sockfd_put(sock);
 
 	return ret;
 }
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux