Re: [RFC PATCH] kvm: Extend irqfd to support level interrupts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2012-06-16 18:34, Alex Williamson wrote:
> I'm looking for opinions on this approach.  For vfio device assignment
> we minimally need a way to get EOIs from the in-kernel irqchip out to
> userspace.  Getting that out via an eventfd would allow us to bounce
> all level interrupts out to userspace, where we would de-assert the
> device interrupt in qemu and unmask the physical device.  Ideally we
> could deassert the interrupt in KVM, which allows us to send the EOI
> directly to vfio.  To do that, we need to use a new IRQ source ID so
> the guest sees the logical OR of qemu requested state and external
> device state.  This allows external devices to share interrupts with
> emulated devices, just like KVM assignment.  That means the means we
> also need to use a new source ID when injecting the interrupt via
> irqfd.
> 
> Rather than creating a source ID allocation interface, extending irqfd
> to support a user supplied source ID, and creating another new
> interface to get the EOI out, I think it works out better to bundle
> these all together as just a level irqfd interface.  This way we don't
> allow users to create unbalanced states where a level interrupt is
> asserted, but has no way of being deasserted.  I believe the below
> does this, but needs testing and validation with an implementation in
> qemu.
> 
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> 
>  arch/x86/kvm/x86.c       |    1 +
>  include/linux/kvm.h      |    6 +++
>  include/linux/kvm_host.h |    4 +-
>  virt/kvm/eventfd.c       |   90 ++++++++++++++++++++++++++++++++++++++--------
>  virt/kvm/kvm_main.c      |    2 +
>  5 files changed, 84 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a01a424..80bed07 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2148,6 +2148,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_GET_TSC_KHZ:
>  	case KVM_CAP_PCI_2_3:
>  	case KVM_CAP_KVMCLOCK_CTRL:
> +	case KVM_CAP_IRQFD_LEVEL:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2ce09aa..cca49a1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_PPC_GET_SMMU_INFO 78
>  #define KVM_CAP_S390_COW 79
>  #define KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_IRQFD_LEVEL 81
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -683,12 +684,15 @@ struct kvm_xen_hvm_config {
>  #endif
>  
>  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> +/* Available with KVM_CAP_IRQFD_LEVEL */
> +#define KVM_IRQFD_FLAG_LEVEL (1 << 1)
>  
>  struct kvm_irqfd {
>  	__u32 fd;
>  	__u32 gsi;
>  	__u32 flags;
> -	__u8  pad[20];
> +	__u32 eoi_fd;
> +	__u8 pad[16];
>  };
>  
>  struct kvm_clock_data {
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 27ac8a4..ae3b426 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -824,7 +824,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
>  #ifdef CONFIG_HAVE_KVM_EVENTFD
>  
>  void kvm_eventfd_init(struct kvm *kvm);
> -int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
> +int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
>  void kvm_irqfd_release(struct kvm *kvm);
>  void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
>  int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
> @@ -833,7 +833,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
>  
>  static inline void kvm_eventfd_init(struct kvm *kvm) {}
>  
> -static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> +static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
>  {
>  	return -EINVAL;
>  }
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f59c1e8..89a6fa9 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -49,9 +49,13 @@ struct _irqfd {
>  	wait_queue_t wait;
>  	/* Update side is protected by irqfds.lock */
>  	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> -	/* Used for level IRQ fast-path */
> +	/* Used for IRQ fast-path */
>  	int gsi;
>  	struct work_struct inject;
> +	/* Used for level EOI path */
> +	int irq_source_id;
> +	struct eventfd_ctx *eoi_eventfd;
> +	struct kvm_irq_ack_notifier notifier;
>  	/* Used for setup/shutdown */
>  	struct eventfd_ctx *eventfd;
>  	struct list_head list;
> @@ -62,7 +66,7 @@ struct _irqfd {
>  static struct workqueue_struct *irqfd_cleanup_wq;
>  
>  static void
> -irqfd_inject(struct work_struct *work)
> +irqfd_inject_edge(struct work_struct *work)
>  {
>  	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
>  	struct kvm *kvm = irqfd->kvm;
> @@ -71,6 +75,23 @@ irqfd_inject(struct work_struct *work)
>  	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
>  }
>  
> +static void
> +irqfd_inject_level(struct work_struct *work)
> +{
> +	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> +
> +	kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 1);
> +}
> +
> +static void
> +irqfd_ack_level(struct kvm_irq_ack_notifier *notifier)
> +{
> +	struct _irqfd *irqfd  = container_of(notifier, struct _irqfd, notifier);
> +
> +	kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 0);
> +	eventfd_signal(irqfd->eoi_eventfd, 1);
> +}
> +
>  /*
>   * Race-free decouple logic (ordering is critical)
>   */
> @@ -96,6 +117,14 @@ irqfd_shutdown(struct work_struct *work)
>  	 * It is now safe to release the object's resources
>  	 */
>  	eventfd_ctx_put(irqfd->eventfd);
> +
> +	if (irqfd->eoi_eventfd) {
> +		kvm_unregister_irq_ack_notifier(irqfd->kvm, &irqfd->notifier);
> +		eventfd_ctx_put(irqfd->eoi_eventfd);
> +		kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 0);
> +		kvm_free_irq_source_id(irqfd->kvm, irqfd->irq_source_id);
> +	}
> +
>  	kfree(irqfd);
>  }
>  
> @@ -198,13 +227,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
>  }
>  
>  static int
> -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> +kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>  {
>  	struct kvm_irq_routing_table *irq_rt;
>  	struct _irqfd *irqfd, *tmp;
>  	struct file *file = NULL;
> -	struct eventfd_ctx *eventfd = NULL;
> -	int ret;
> +	struct eventfd_ctx *eventfd = NULL, *eoi_eventfd = NULL;
> +	int ret, irq_source_id = -1;
>  	unsigned int events;
>  
>  	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
> @@ -212,12 +241,35 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>  		return -ENOMEM;
>  
>  	irqfd->kvm = kvm;
> -	irqfd->gsi = gsi;
> +	irqfd->gsi = args->gsi;
>  	INIT_LIST_HEAD(&irqfd->list);
> -	INIT_WORK(&irqfd->inject, irqfd_inject);
> +
> +	if (args->flags & KVM_IRQFD_FLAG_LEVEL) {
> +		irq_source_id = kvm_request_irq_source_id(kvm);
> +		if (irq_source_id < 0) {
> +			ret = irq_source_id;
> +			goto fail;
> +		}
> +
> +		eoi_eventfd = eventfd_ctx_fdget(args->eoi_fd);
> +		if (IS_ERR(eoi_eventfd)) {
> +			ret = PTR_ERR(eoi_eventfd);
> +			goto fail;
> +		}
> +
> +		irqfd->irq_source_id = irq_source_id;
> +		irqfd->eoi_eventfd = eoi_eventfd;
> +		irqfd->notifier.gsi = args->gsi;
> +		irqfd->notifier.irq_acked = irqfd_ack_level;
> +		kvm_register_irq_ack_notifier(kvm, &irqfd->notifier);
> +
> +		INIT_WORK(&irqfd->inject, irqfd_inject_level);
> +	} else
> +		INIT_WORK(&irqfd->inject, irqfd_inject_edge);
> +
>  	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
>  
> -	file = eventfd_fget(fd);
> +	file = eventfd_fget(args->fd);
>  	if (IS_ERR(file)) {
>  		ret = PTR_ERR(file);
>  		goto fail;
> @@ -282,6 +334,14 @@ fail:
>  	if (!IS_ERR(file))
>  		fput(file);
>  
> +	if (eoi_eventfd && !IS_ERR(eoi_eventfd)) {
> +		kvm_unregister_irq_ack_notifier(kvm, &irqfd->notifier);
> +		eventfd_ctx_put(eoi_eventfd);
> +	}
> +
> +	if (irq_source_id >= 0)
> +		kvm_free_irq_source_id(kvm, irq_source_id);
> +
>  	kfree(irqfd);
>  	return ret;
>  }
> @@ -298,19 +358,19 @@ kvm_eventfd_init(struct kvm *kvm)
>   * shutdown any irqfd's that match fd+gsi
>   */
>  static int
> -kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> +kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
>  {
>  	struct _irqfd *irqfd, *tmp;
>  	struct eventfd_ctx *eventfd;
>  
> -	eventfd = eventfd_ctx_fdget(fd);
> +	eventfd = eventfd_ctx_fdget(args->fd);
>  	if (IS_ERR(eventfd))
>  		return PTR_ERR(eventfd);
>  
>  	spin_lock_irq(&kvm->irqfds.lock);
>  
>  	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> -		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> +		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
>  			/*
>  			 * This rcu_assign_pointer is needed for when
>  			 * another thread calls kvm_irq_routing_update before
> @@ -338,12 +398,12 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
>  }
>  
>  int
> -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> +kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
>  {
> -	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
> -		return kvm_irqfd_deassign(kvm, fd, gsi);
> +	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
> +		return kvm_irqfd_deassign(kvm, args);
>  
> -	return kvm_irqfd_assign(kvm, fd, gsi);
> +	return kvm_irqfd_assign(kvm, args);
>  }
>  
>  /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 02cb440..b4ad14cc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2059,7 +2059,7 @@ static long kvm_vm_ioctl(struct file *filp,
>  		r = -EFAULT;
>  		if (copy_from_user(&data, argp, sizeof data))
>  			goto out;
> -		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
> +		r = kvm_irqfd(kvm, &data);
>  		break;
>  	}
>  	case KVM_IOEVENTFD: {
> 

Looks reasonable to me.

Jan


Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux