Re: [PATCH v2 2/4] KVM: Switch assigned device IRQ forwarding to threaded handler

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Tue, 2 Nov 2010 19:44:40 +0200

On Tue, Nov 02, 2010 at 04:49:18PM +0100, Jan Kiszka wrote:
> This improves the IRQ forwarding for assigned devices: By using the
> kernel's threaded IRQ scheme, we can get rid of the latency-prone work
> queue and simplify the code in the same run.

Interesting. Do you see any latency improvements from this?

> Moreover, we no longer have to hold assigned_dev_lock while raising the
> guest IRQ, which can be a lenghty operation as we may have to iterate
> over all VCPUs. The lock is now only used for synchronizing masking vs.
> unmasking of INTx-type IRQs, thus is renames to intx_lock.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
> ---
>  include/linux/kvm_host.h |   12 +----
>  virt/kvm/assigned-dev.c  |  106 ++++++++++++++-------------------------------
>  2 files changed, 35 insertions(+), 83 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 462b982..a786419 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -456,16 +456,8 @@ struct kvm_irq_ack_notifier {
>  	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
>  };
>  
> -#define KVM_ASSIGNED_MSIX_PENDING		0x1
> -struct kvm_guest_msix_entry {
> -	u32 vector;
> -	u16 entry;
> -	u16 flags;
> -};
> -
>  struct kvm_assigned_dev_kernel {
>  	struct kvm_irq_ack_notifier ack_notifier;
> -	struct work_struct interrupt_work;
>  	struct list_head list;
>  	int assigned_dev_id;
>  	int host_segnr;
> @@ -476,13 +468,13 @@ struct kvm_assigned_dev_kernel {
>  	bool host_irq_disabled;
>  	struct msix_entry *host_msix_entries;
>  	int guest_irq;
> -	struct kvm_guest_msix_entry *guest_msix_entries;
> +	struct msix_entry *guest_msix_entries;
>  	unsigned long irq_requested_type;
>  	int irq_source_id;
>  	int flags;
>  	struct pci_dev *dev;
>  	struct kvm *kvm;
> -	spinlock_t assigned_dev_lock;
> +	spinlock_t intx_lock;
>  };
>  
>  struct kvm_irq_mask_notifier {
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index ecc4419..d0b07ea 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -55,58 +55,31 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
>  	return index;
>  }
>  
> -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
> +static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
>  {
> -	struct kvm_assigned_dev_kernel *assigned_dev;
> -	int i;
> +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
> +	u32 vector;
> +	int index;
>  
> -	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
> -				    interrupt_work);
> +	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
> +		spin_lock(&assigned_dev->intx_lock);
> +		disable_irq_nosync(irq);
> +		assigned_dev->host_irq_disabled = true;
> +		spin_unlock(&assigned_dev->intx_lock);
> +	}
>  
> -	spin_lock_irq(&assigned_dev->assigned_dev_lock);
>  	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
> -		struct kvm_guest_msix_entry *guest_entries =
> -			assigned_dev->guest_msix_entries;
> -		for (i = 0; i < assigned_dev->entries_nr; i++) {
> -			if (!(guest_entries[i].flags &
> -					KVM_ASSIGNED_MSIX_PENDING))
> -				continue;
> -			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
> +		index = find_index_from_host_irq(assigned_dev, irq);
> +		if (index >= 0) {
> +			vector = assigned_dev->
> +					guest_msix_entries[index].vector;
>  			kvm_set_irq(assigned_dev->kvm,
> -				    assigned_dev->irq_source_id,
> -				    guest_entries[i].vector, 1);
> +				    assigned_dev->irq_source_id, vector, 1);
>  		}
>  	} else
>  		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
>  			    assigned_dev->guest_irq, 1);
>  
> -	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
> -}
> -
> -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
> -{
> -	unsigned long flags;
> -	struct kvm_assigned_dev_kernel *assigned_dev =
> -		(struct kvm_assigned_dev_kernel *) dev_id;
> -
> -	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
> -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
> -		int index = find_index_from_host_irq(assigned_dev, irq);
> -		if (index < 0)
> -			goto out;
> -		assigned_dev->guest_msix_entries[index].flags |=
> -			KVM_ASSIGNED_MSIX_PENDING;
> -	}
> -
> -	schedule_work(&assigned_dev->interrupt_work);
> -
> -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
> -		disable_irq_nosync(irq);
> -		assigned_dev->host_irq_disabled = true;
> -	}
> -
> -out:
> -	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
>  	return IRQ_HANDLED;
>  }
>  
> @@ -114,7 +87,6 @@ out:
>  static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
>  {
>  	struct kvm_assigned_dev_kernel *dev;
> -	unsigned long flags;
>  
>  	if (kian->gsi == -1)
>  		return;
> @@ -127,12 +99,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
>  	/* The guest irq may be shared so this ack may be
>  	 * from another device.
>  	 */
> -	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
> +	spin_lock(&dev->intx_lock);
>  	if (dev->host_irq_disabled) {
>  		enable_irq(dev->host_irq);
>  		dev->host_irq_disabled = false;
>  	}
> -	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
> +	spin_unlock(&dev->intx_lock);
>  }
>  
>  static void deassign_guest_irq(struct kvm *kvm,
> @@ -155,28 +127,19 @@ static void deassign_host_irq(struct kvm *kvm,
>  			      struct kvm_assigned_dev_kernel *assigned_dev)
>  {
>  	/*
> -	 * In kvm_free_device_irq, cancel_work_sync return true if:
> -	 * 1. work is scheduled, and then cancelled.
> -	 * 2. work callback is executed.
> -	 *
> -	 * The first one ensured that the irq is disabled and no more events
> -	 * would happen. But for the second one, the irq may be enabled (e.g.
> -	 * for MSI). So we disable irq here to prevent further events.
> +	 * We disable irq here to prevent further events.
>  	 *
>  	 * Notice this maybe result in nested disable if the interrupt type is
>  	 * INTx, but it's OK for we are going to free it.
>  	 *
>  	 * If this function is a part of VM destroy, please ensure that till
>  	 * now, the kvm state is still legal for probably we also have to wait
> -	 * interrupt_work done.
> +	 * on a currently running IRQ handler.
>  	 */
>  	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
>  		int i;
>  		for (i = 0; i < assigned_dev->entries_nr; i++)
> -			disable_irq_nosync(assigned_dev->
> -					   host_msix_entries[i].vector);
> -
> -		cancel_work_sync(&assigned_dev->interrupt_work);
> +			disable_irq(assigned_dev->host_msix_entries[i].vector);
>  
>  		for (i = 0; i < assigned_dev->entries_nr; i++)
>  			free_irq(assigned_dev->host_msix_entries[i].vector,
> @@ -188,8 +151,7 @@ static void deassign_host_irq(struct kvm *kvm,
>  		pci_disable_msix(assigned_dev->dev);
>  	} else {
>  		/* Deal with MSI and INTx */
> -		disable_irq_nosync(assigned_dev->host_irq);
> -		cancel_work_sync(&assigned_dev->interrupt_work);
> +		disable_irq(assigned_dev->host_irq);
>  
>  		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
>  
> @@ -268,8 +230,8 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
>  	 * on the same interrupt line is not a happy situation: there
>  	 * are going to be long delays in accepting, acking, etc.
>  	 */
> -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
> -			0, "kvm_assigned_intx_device", (void *)dev))
> +	if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
> +				 0, "kvm_assigned_intx_device", (void *)dev))
>  		return -EIO;
>  	return 0;
>  }
> @@ -287,8 +249,8 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
>  	}
>  
>  	dev->host_irq = dev->dev->irq;
> -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
> -			"kvm_assigned_msi_device", (void *)dev)) {
> +	if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
> +				 0, "kvm_assigned_msi_device", (void *)dev)) {
>  		pci_disable_msi(dev->dev);
>  		return -EIO;
>  	}
> @@ -313,10 +275,10 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
>  		return r;
>  
>  	for (i = 0; i < dev->entries_nr; i++) {
> -		r = request_irq(dev->host_msix_entries[i].vector,
> -				kvm_assigned_dev_intr, 0,
> -				"kvm_assigned_msix_device",
> -				(void *)dev);
> +		r = request_threaded_irq(dev->host_msix_entries[i].vector,
> +					 NULL, kvm_assigned_dev_thread,
> +					 0, "kvm_assigned_msix_device",
> +					 (void *)dev);
>  		if (r)
>  			goto err;
>  	}
> @@ -557,12 +519,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>  	match->host_devfn = assigned_dev->devfn;
>  	match->flags = assigned_dev->flags;
>  	match->dev = dev;
> -	spin_lock_init(&match->assigned_dev_lock);
> +	spin_lock_init(&match->intx_lock);
>  	match->irq_source_id = -1;
>  	match->kvm = kvm;
>  	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
> -	INIT_WORK(&match->interrupt_work,
> -		  kvm_assigned_dev_interrupt_work_handler);
>  
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
>  
> @@ -654,9 +614,9 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
>  			r = -ENOMEM;
>  			goto msix_nr_out;
>  		}
> -		adev->guest_msix_entries = kzalloc(
> -				sizeof(struct kvm_guest_msix_entry) *
> -				entry_nr->entry_nr, GFP_KERNEL);
> +		adev->guest_msix_entries =
> +			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
> +				GFP_KERNEL);
>  		if (!adev->guest_msix_entries) {
>  			kfree(adev->host_msix_entries);
>  			r = -ENOMEM;
> -- 
> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html