On Sun, Jul 03, 2011 at 08:04:49PM +0300, Sasha Levin wrote: > The new flag allows passing a write side of a pipe instead of an > eventfd to be notified of writes to the specified memory region. > > Instead of signaling an event, the value written to the memory region > is written to the pipe. > > Using a pipe instead of an eventfd is usefull when any value can be > written to the memory region but we're interested in recieving the > actual value instead of just a notification. > > A simple example for practical use is the serial port. we are not > interested in an exit every time a char is written to the port, but > we do need to know what was written so we could handle it on the guest. Looking at this example, how would you handle a pipe full condition? We can't buffer unlimited amount of data in the host. > Cc: Avi Kivity <avi@xxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxx> > Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx> > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > Cc: Pekka Enberg <penberg@xxxxxxxxxx> > Signed-off-by: Sasha Levin <levinsasha928@xxxxxxxxx> > --- > include/linux/kvm.h | 2 + > virt/kvm/eventfd.c | 65 +++++++++++++++++++++++++++++++++++--------------- > 2 files changed, 47 insertions(+), 20 deletions(-) > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index 55ef181..548f23a 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -387,12 +387,14 @@ enum { > kvm_ioeventfd_flag_nr_datamatch, > kvm_ioeventfd_flag_nr_pio, > kvm_ioeventfd_flag_nr_deassign, > + kvm_ioeventfd_flag_nr_pipe, > kvm_ioeventfd_flag_nr_max, > }; > > #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) > #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) > #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) > +#define KVM_IOEVENTFD_FLAG_PIPE (1 << kvm_ioeventfd_flag_nr_pipe) > > #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 73358d2..434293e 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -413,10 +413,11 @@ module_exit(irqfd_module_exit); > > /* > * -------------------------------------------------------------------- > - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. > + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or > + * a pipe write. > * > - * userspace can register a PIO/MMIO address with an eventfd for receiving > - * notification when the memory has been touched. > + * userspace can register a PIO/MMIO address with an eventfd or a > + * pipe for receiving notification when the memory has been touched. > * -------------------------------------------------------------------- > */ > > @@ -424,6 +425,7 @@ struct _ioeventfd { > struct list_head list; > u64 addr; > int length; > + struct file *pipe; > struct eventfd_ctx *eventfd; > u64 datamatch; > struct kvm_io_device dev; > @@ -439,7 +441,11 @@ to_ioeventfd(struct kvm_io_device *dev) > static void > ioeventfd_release(struct _ioeventfd *p) > { > - eventfd_ctx_put(p->eventfd); > + if (p->eventfd) > + eventfd_ctx_put(p->eventfd); > + else > + fput(p->pipe); > + > list_del(&p->list); > kfree(p); > } > @@ -481,6 +487,21 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) > return _val == p->datamatch ? true : false; > } > > +static ssize_t kernel_write(struct file *file, const char *buf, size_t count, > + loff_t pos) > +{ > + mm_segment_t old_fs; > + ssize_t res; > + > + old_fs = get_fs(); > + set_fs(get_ds()); > + /* The cast to a user pointer is valid due to the set_fs() */ Interesting. Is buf really always a user pointer? Why don't we tag it __user then? > + res = vfs_write(file, (const char __user *)buf, count, &pos); If pipe is non-blocking, or if we get a signal, this might fail or return a value < len. Data will be lost then, won't it? > + set_fs(old_fs); > + > + return res; > +} > + > /* MMIO/PIO writes trigger an event if the addr/val match */ > static int > ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, > @@ -491,7 +512,11 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, > if (!ioeventfd_in_range(p, addr, len, val)) > return -EOPNOTSUPP; > > - eventfd_signal(p->eventfd, 1); > + if (p->pipe) > + kernel_write(p->pipe, val, len, 0); > + else > + eventfd_signal(p->eventfd, 1); > + > return 0; > } > > @@ -533,7 +558,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; > enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; > struct _ioeventfd *p; > - struct eventfd_ctx *eventfd; > + struct eventfd_ctx *eventfd = NULL; > int ret; > > /* must be natural-word sized */ > @@ -555,9 +580,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) > return -EINVAL; > > - eventfd = eventfd_ctx_fdget(args->fd); > - if (IS_ERR(eventfd)) > - return PTR_ERR(eventfd); > + if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) { > + eventfd = eventfd_ctx_fdget(args->fd); > + if (IS_ERR(eventfd)) > + return PTR_ERR(eventfd); > + } > > p = kzalloc(sizeof(*p), GFP_KERNEL); > if (!p) { > @@ -568,7 +595,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > INIT_LIST_HEAD(&p->list); > p->addr = args->addr; > p->length = args->len; > - p->eventfd = eventfd; > + > + if (args->flags & KVM_IOEVENTFD_FLAG_PIPE) > + p->pipe = fget(args->fd); This really needs to check that the fd is a pipe. Otherwise you can do weird things like pass in the kvm device fd itself. > + else > + p->eventfd = eventfd; > > /* The datamatch feature is optional, otherwise this is a wildcard */ > if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) > @@ -601,7 +632,9 @@ unlock_fail: > > fail: > kfree(p); > - eventfd_ctx_put(eventfd); > + > + if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) > + eventfd_ctx_put(eventfd); > > return ret; > } > @@ -612,20 +645,14 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; > enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; > struct _ioeventfd *p, *tmp; > - struct eventfd_ctx *eventfd; > int ret = -ENOENT; > > - eventfd = eventfd_ctx_fdget(args->fd); > - if (IS_ERR(eventfd)) > - return PTR_ERR(eventfd); > - > mutex_lock(&kvm->slots_lock); > > list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { > bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); > > - if (p->eventfd != eventfd || > - p->addr != args->addr || > + if (p->addr != args->addr || > p->length != args->len || > p->wildcard != wildcard) > continue; > @@ -641,8 +668,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > > mutex_unlock(&kvm->slots_lock); > > - eventfd_ctx_put(eventfd); > - Don't we need to put on deassign? > return ret; > } > > -- > 1.7.6 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html