Re: [PATCHv0 RFC] kvm: irqfd support for level interrupts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jul 27, 2009 at 11:02:06AM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote:
> > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote:
> > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote:
> > > > > Here's an untested patch with partial support for level triggered
> > > > > interrupts in irqfd. What this patch has: support for clearing interrupt
> > > > > on ack. What this patch does not have: support signalling eventfd on ack
> > > > > so that userspace can take action and e.g. reenable interrupt.
> > > > > 
> > > > Do we want level triggered irqfd to be used only for device assignment?
> > > 
> > > generally level triggered-not only.
> > > But irqfd is promarily for device assignment and emulated devices.
> > > 
> > If it is for emulated device zeroing irq level on ack is not acceptable.
> 
> OK, forget about emulated devices for now. Let's focus on assigned
> devices.
> 
Can't. We should design interface that works for everything.

> > > > Because if not it is not appropriate to zero irq level on ack.
> > > 
> > > The bit that is missing in this patch, is that we'll signal eventfd
> > > after we zero irq. Userspace polls that and re-asserts irq.
> > That is not needed for device emulation. Emulated device should lower
> > irq line by itself. It know exactly when to do it. Please don't push
> > broken device assignment model to emulated devices.
> > 
> > > 
> > > > And you
> > > > have no other way to zero irq level?!
> > > 
> > > The reason is interrupt sharing: device can assert interrupt
> > > but can not clear it by itself.
> > For interrupt sharing to work you need to allocate source id for each
> > irqfd.
> 
> BTW, current SET_IRQ_LINE interface uses a common source id for all
> irqs. Does this mean that interrupt sharing in guest is broken now?
No. It means that sharing in handled in userspace by chipset emulation.

> 
> > > qemu can still use the SET_IRQ ioctl if it wants full control.
> > > 
> > > 
> > > > Another note is that level
> > > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may
> > > > be shared.
> > > 
> > > Since we zero on ack, I think this is not a problem.
> > > 
> > We will not zero on ack for emulated devices. Please don't abuse ack
> > notifiers. Even if you zero on ack this is still the problem BTW since
> > other device may lower your line level.
> > 
> > > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking.
> > > > > Does it look sane?
> > > > > 
> > > > > Avi, how's the interface? I intend to also add an eventfd probably in
> > > > > the padding in the irqfd struct.
> > > > > 
> > > > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
> > > > > 
> > > > > ---
> > > > > 
> > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > > > > index 230a91a..8bf16af 100644
> > > > > --- a/include/linux/kvm.h
> > > > > +++ b/include/linux/kvm.h
> > > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce {
> > > > >  #endif
> > > > >  
> > > > >  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> > > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1)
> > > > >  
> > > > >  struct kvm_irqfd {
> > > > >  	__u32 fd;
> > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > > > index 99017e8..fcbf5b5 100644
> > > > > --- a/virt/kvm/eventfd.c
> > > > > +++ b/virt/kvm/eventfd.c
> > > > > @@ -45,12 +45,14 @@ struct _irqfd {
> > > > >  	struct kvm               *kvm;
> > > > >  	struct eventfd_ctx       *eventfd;
> > > > >  	int                       gsi;
> > > > > +	int                       is_level;
> > > > >  	struct list_head          list;
> > > > >  	poll_table                pt;
> > > > >  	wait_queue_head_t        *wqh;
> > > > >  	wait_queue_t              wait;
> > > > >  	struct work_struct        inject;
> > > > >  	struct work_struct        shutdown;
> > > > > +	struct kvm_irq_ack_notifier kian;
> > > > >  };
> > > > >  
> > > > >  static struct workqueue_struct *irqfd_cleanup_wq;
> > > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work)
> > > > >  
> > > > >  	mutex_lock(&kvm->irq_lock);
> > > > >  	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > > > -	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > > > +	if (!irqfd->is_level)
> > > > > +		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > > >  	mutex_unlock(&kvm->irq_lock);
> > > > >  }
> > > > >  
> > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian)
> > > > > +{
> > > > > +	kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0);
> > > > > +}
> > > > >  /*
> > > > >   * Race-free decouple logic (ordering is critical)
> > > > >   */
> > > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work)
> > > > >  	 */
> > > > >  	flush_work(&irqfd->inject);
> > > > >  
> > > > > +	if (irqfd->is_level)
> > > > > +		kvm_unregister_irq_ack_notifier(&irqfd->kian);
> > > > > +
> > > > >  	/*
> > > > >  	 * It is now safe to release the object's resources
> > > > >  	 */
> > > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
> > > > >  }
> > > > >  
> > > > >  static int
> > > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level)
> > > > >  {
> > > > >  	struct _irqfd *irqfd;
> > > > >  	struct file *file = NULL;
> > > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > > > >  
> > > > >  	irqfd->kvm = kvm;
> > > > >  	irqfd->gsi = gsi;
> > > > > +	irqfd->is_level = is_level;
> > > > >  	INIT_LIST_HEAD(&irqfd->list);
> > > > >  	INIT_WORK(&irqfd->inject, irqfd_inject);
> > > > >  	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
> > > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > > > >  
> > > > >  	irqfd->eventfd = eventfd;
> > > > >  
> > > > > +	if (is_level) {
> > > > > +		irqfd->kian.gsi = gsi;
> > > > > +		irqfd->kian.irq_acked = irqfd_irq_acked;
> > > > > +		kvm_register_irq_ack_notifier(&irqfd->kian);
> > > > > +	}
> > > > > +
> > > > >  	/*
> > > > >  	 * Install our own custom wake-up handling so we are notified via
> > > > >  	 * a callback whenever someone signals the underlying eventfd
> > > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> > > > >  int
> > > > >  kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> > > > >  {
> > > > > +	if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL))
> > > > > +		return -EINVAL;
> > > > > +
> > > > >  	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
> > > > >  		return kvm_irqfd_deassign(kvm, fd, gsi);
> > > > >  
> > > > > -	return kvm_irqfd_assign(kvm, fd, gsi);
> > > > > +	return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL));
> > > > >  }
> > > > >  
> > > > >  /*
> > > > 
> > > > --
> > > > 			Gleb.
> > 
> > --
> > 			Gleb.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux