On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > Here's an untested patch with partial support for level triggered > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > on ack. What this patch does not have: support signalling eventfd on ack > > > so that userspace can take action and e.g. reenable interrupt. > > > > > Do we want level triggered irqfd to be used only for device assignment? > > generally level triggered-not only. > But irqfd is promarily for device assignment and emulated devices. > If it is for emulated device zeroing irq level on ack is not acceptable. > > Because if not it is not appropriate to zero irq level on ack. > > The bit that is missing in this patch, is that we'll signal eventfd > after we zero irq. Userspace polls that and re-asserts irq. That is not needed for device emulation. Emulated device should lower irq line by itself. It know exactly when to do it. Please don't push broken device assignment model to emulated devices. > > > And you > > have no other way to zero irq level?! > > The reason is interrupt sharing: device can assert interrupt > but can not clear it by itself. For interrupt sharing to work you need to allocate source id for each irqfd. > qemu can still use the SET_IRQ ioctl if it wants full control. > > > > Another note is that level > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > > be shared. > > Since we zero on ack, I think this is not a problem. > We will not zero on ack for emulated devices. Please don't abuse ack notifiers. Even if you zero on ack this is still the problem BTW since other device may lower your line level. > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > > Does it look sane? > > > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > > the padding in the irqfd struct. > > > > > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > > > > > > --- > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > index 230a91a..8bf16af 100644 > > > --- a/include/linux/kvm.h > > > +++ b/include/linux/kvm.h > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > > #endif > > > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > > > struct kvm_irqfd { > > > __u32 fd; > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > index 99017e8..fcbf5b5 100644 > > > --- a/virt/kvm/eventfd.c > > > +++ b/virt/kvm/eventfd.c > > > @@ -45,12 +45,14 @@ struct _irqfd { > > > struct kvm *kvm; > > > struct eventfd_ctx *eventfd; > > > int gsi; > > > + int is_level; > > > struct list_head list; > > > poll_table pt; > > > wait_queue_head_t *wqh; > > > wait_queue_t wait; > > > struct work_struct inject; > > > struct work_struct shutdown; > > > + struct kvm_irq_ack_notifier kian; > > > }; > > > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > > > mutex_lock(&kvm->irq_lock); > > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > + if (!irqfd->is_level) > > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > mutex_unlock(&kvm->irq_lock); > > > } > > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > > +{ > > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > > +} > > > /* > > > * Race-free decouple logic (ordering is critical) > > > */ > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > > */ > > > flush_work(&irqfd->inject); > > > > > > + if (irqfd->is_level) > > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > > + > > > /* > > > * It is now safe to release the object's resources > > > */ > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > > } > > > > > > static int > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > > { > > > struct _irqfd *irqfd; > > > struct file *file = NULL; > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > irqfd->kvm = kvm; > > > irqfd->gsi = gsi; > > > + irqfd->is_level = is_level; > > > INIT_LIST_HEAD(&irqfd->list); > > > INIT_WORK(&irqfd->inject, irqfd_inject); > > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > irqfd->eventfd = eventfd; > > > > > > + if (is_level) { > > > + irqfd->kian.gsi = gsi; > > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > > + } > > > + > > > /* > > > * Install our own custom wake-up handling so we are notified via > > > * a callback whenever someone signals the underlying eventfd > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > > int > > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > > { > > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > > + return -EINVAL; > > > + > > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > > } > > > > > > /* > > > > -- > > Gleb. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html