Re: [PATCH] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_PIPE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Jul 03, 2011 at 08:04:49PM +0300, Sasha Levin wrote:
> The new flag allows passing a write side of a pipe instead of an
> eventfd to be notified of writes to the specified memory region.
> 
> Instead of signaling an event, the value written to the memory region
> is written to the pipe.
> 
> Using a pipe instead of an eventfd is usefull when any value can be
> written to the memory region but we're interested in recieving the
> actual value instead of just a notification.
> 
> A simple example for practical use is the serial port. we are not
> interested in an exit every time a char is written to the port, but
> we do need to know what was written so we could handle it on the guest.

Looking at this example, how would you handle a pipe full condition?
We can't buffer unlimited amount of data in the host.

> Cc: Avi Kivity <avi@xxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxx>
> Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
> Cc: Pekka Enberg <penberg@xxxxxxxxxx>
> Signed-off-by: Sasha Levin <levinsasha928@xxxxxxxxx>
> ---
>  include/linux/kvm.h |    2 +
>  virt/kvm/eventfd.c  |   65 +++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 47 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 55ef181..548f23a 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -387,12 +387,14 @@ enum {
>  	kvm_ioeventfd_flag_nr_datamatch,
>  	kvm_ioeventfd_flag_nr_pio,
>  	kvm_ioeventfd_flag_nr_deassign,
> +	kvm_ioeventfd_flag_nr_pipe,
>  	kvm_ioeventfd_flag_nr_max,
>  };
>  
>  #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
>  #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
>  #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
> +#define KVM_IOEVENTFD_FLAG_PIPE      (1 << kvm_ioeventfd_flag_nr_pipe)
>  
>  #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
>  
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..434293e 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -413,10 +413,11 @@ module_exit(irqfd_module_exit);
>  
>  /*
>   * --------------------------------------------------------------------
> - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
> + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
> + *            a pipe write.
>   *
> - * userspace can register a PIO/MMIO address with an eventfd for receiving
> - * notification when the memory has been touched.
> + * userspace can register a PIO/MMIO address with an eventfd or a 
> + * pipe for receiving notification when the memory has been touched.
>   * --------------------------------------------------------------------
>   */
>  
> @@ -424,6 +425,7 @@ struct _ioeventfd {
>  	struct list_head     list;
>  	u64                  addr;
>  	int                  length;
> +	struct file         *pipe;
>  	struct eventfd_ctx  *eventfd;
>  	u64                  datamatch;
>  	struct kvm_io_device dev;
> @@ -439,7 +441,11 @@ to_ioeventfd(struct kvm_io_device *dev)
>  static void
>  ioeventfd_release(struct _ioeventfd *p)
>  {
> -	eventfd_ctx_put(p->eventfd);
> +	if (p->eventfd)
> +		eventfd_ctx_put(p->eventfd);
> +	else
> +		fput(p->pipe);
> +
>  	list_del(&p->list);
>  	kfree(p);
>  }
> @@ -481,6 +487,21 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
>  	return _val == p->datamatch ? true : false;
>  }
>  
> +static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> +			    loff_t pos)
> +{
> +	mm_segment_t old_fs;
> +	ssize_t res;
> +
> +	old_fs = get_fs();
> +	set_fs(get_ds());
> +	/* The cast to a user pointer is valid due to the set_fs() */

Interesting. Is buf really always a user pointer?
Why don't we tag it __user then?

> +	res = vfs_write(file, (const char __user *)buf, count, &pos);

If pipe is non-blocking, or if we get a signal,
this might fail or return a value < len.
Data will be lost then, won't it?

> +	set_fs(old_fs);
> +
> +	return res;
> +}
> +
>  /* MMIO/PIO writes trigger an event if the addr/val match */
>  static int
>  ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
> @@ -491,7 +512,11 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
>  	if (!ioeventfd_in_range(p, addr, len, val))
>  		return -EOPNOTSUPP;
>  
> -	eventfd_signal(p->eventfd, 1);
> +	if (p->pipe)
> +		kernel_write(p->pipe, val, len, 0);
> +	else
> +		eventfd_signal(p->eventfd, 1);
> +
>  	return 0;
>  }
>  
> @@ -533,7 +558,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>  	struct _ioeventfd        *p;
> -	struct eventfd_ctx       *eventfd;
> +	struct eventfd_ctx       *eventfd = NULL;
>  	int                       ret;
>  
>  	/* must be natural-word sized */
> @@ -555,9 +580,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
>  		return -EINVAL;
>  
> -	eventfd = eventfd_ctx_fdget(args->fd);
> -	if (IS_ERR(eventfd))
> -		return PTR_ERR(eventfd);
> +	if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) {
> +		eventfd = eventfd_ctx_fdget(args->fd);
> +		if (IS_ERR(eventfd))
> +			return PTR_ERR(eventfd);
> +	}
>  
>  	p = kzalloc(sizeof(*p), GFP_KERNEL);
>  	if (!p) {
> @@ -568,7 +595,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	INIT_LIST_HEAD(&p->list);
>  	p->addr    = args->addr;
>  	p->length  = args->len;
> -	p->eventfd = eventfd;
> +
> +	if (args->flags & KVM_IOEVENTFD_FLAG_PIPE)
> +		p->pipe = fget(args->fd);

This really needs to check that the fd is a pipe.
Otherwise you can do weird things like pass in
the kvm device fd itself.


> +	else
> +		p->eventfd = eventfd;
>  
>  	/* The datamatch feature is optional, otherwise this is a wildcard */
>  	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
> @@ -601,7 +632,9 @@ unlock_fail:
>  
>  fail:
>  	kfree(p);
> -	eventfd_ctx_put(eventfd);
> +
> +	if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE))
> +		eventfd_ctx_put(eventfd);
>  
>  	return ret;
>  }
> @@ -612,20 +645,14 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>  	struct _ioeventfd        *p, *tmp;
> -	struct eventfd_ctx       *eventfd;
>  	int                       ret = -ENOENT;
>  
> -	eventfd = eventfd_ctx_fdget(args->fd);
> -	if (IS_ERR(eventfd))
> -		return PTR_ERR(eventfd);
> -
>  	mutex_lock(&kvm->slots_lock);
>  
>  	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
>  		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
>  
> -		if (p->eventfd != eventfd  ||
> -		    p->addr != args->addr  ||
> +		if (p->addr != args->addr  ||
>  		    p->length != args->len ||
>  		    p->wildcard != wildcard)
>  			continue;
> @@ -641,8 +668,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  
>  	mutex_unlock(&kvm->slots_lock);
>  
> -	eventfd_ctx_put(eventfd);
> -

Don't we need to put on deassign?

>  	return ret;
>  }
>  
> -- 
> 1.7.6
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux