[PATCH 3/3] KVM: Allow host IRQ sharing for passed-through PCI 2.3 devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>

PCI 2.3 allows to generically disable IRQ sources at device level. This
enables us to share IRQs of such devices between on the host side when
passing them to a guest.

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/assigned-dev.c  |  153 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 140 insertions(+), 14 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index df5497f..fcdc849 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -473,6 +473,7 @@ struct kvm_assigned_dev_kernel {
 	unsigned int entries_nr;
 	int host_irq;
 	bool host_irq_disabled;
+	bool pci_2_3;
 	struct msix_entry *host_msix_entries;
 	int guest_irq;
 	struct kvm_guest_msix_entry *guest_msix_entries;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index d3ddfea..411643c 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -55,10 +55,96 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
 	return index;
 }
 
+/*
+ * Verify that the device supports Interrupt Disable bit in command register,
+ * per PCI 2.3, by flipping this bit and reading it back: this bit was readonly
+ * in PCI 2.2.
+ */
+static bool pci_2_3_supported(struct pci_dev *pdev)
+{
+	u16 orig, new;
+	bool supported = false;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_word(pdev, PCI_COMMAND, &orig);
+	pci_write_config_word(pdev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(pdev, PCI_COMMAND, &new);
+	
+	/*
+	 * There's no way to protect against
+	 * hardware bugs or detect them reliably, but as long as we know
+	 * what the value should be, let's go ahead and check it.
+	 */
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		dev_err(&pdev->dev, "Command changed from 0x%x to 0x%x: "
+			"driver or HW bug?\n", orig, new);
+		goto out;
+	}
+	if (!((new ^ orig) & PCI_COMMAND_INTX_DISABLE)) {
+		dev_warn(&pdev->dev, "Device does not support "
+			 "disabling interrupts: unable to bind.\n");
+		goto out;
+	}
+	supported = true;
+
+	/* Now restore the original value. */
+	pci_write_config_word(pdev, PCI_COMMAND, orig);
+
+out:
+	pci_unblock_user_cfg_access(pdev);
+	return supported;
+}
+
+static void
+pci_2_3_mask_irq(struct pci_dev *dev, int mask, unsigned int *irq_status)
+{
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd;
+
+	/*
+	 * We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible.
+	 */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	pci_block_user_cfg_access(dev);
+
+	/*
+	 * Read both command and status registers in a single 32-bit operation.
+	 * Note: we could cache the value for command and move the status read
+	 * out of the lock if there was a way to get notified of user changes
+	 * to command register through sysfs. Should be good for shared irqs.
+	 */
+	pci_read_config_dword(dev, PCI_COMMAND, &cmd_status_dword);
+	origcmd = cmd_status_dword;
+
+	if (irq_status) {
+		/*
+		* Check interrupt status register to see whether our device triggered
+		* the interrupt.
+		*/
+		*irq_status = (cmd_status_dword >> 16) & PCI_STATUS_INTERRUPT;
+		if (*irq_status == 0)
+			goto done;
+	}
+
+	newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE;
+	if (mask)
+		newcmd |= PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		pci_write_config_word(dev, PCI_COMMAND, newcmd);
+
+done:
+	pci_unblock_user_cfg_access(dev);
+}
+
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
+	int ret = IRQ_HANDLED;
 	unsigned long flags;
 
 	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
@@ -83,19 +169,34 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 				    guest_entries[i].vector, 1);
 		}
 	} else {
-		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-			    assigned_dev->guest_irq, 1);
-
 		if (assigned_dev->irq_requested_type &
 				KVM_DEV_IRQ_GUEST_INTX) {
-			disable_irq_nosync(irq);
+			if (assigned_dev->pci_2_3) {
+				unsigned int irq_status;
+
+				if (assigned_dev->host_irq_disabled) {
+					ret = IRQ_NONE;
+					goto out;
+				}
+
+				pci_2_3_mask_irq(assigned_dev->dev, 1,
+						 &irq_status);
+				if (irq_status == 0) {
+					ret = IRQ_NONE;
+					goto out;
+				}
+			} else
+				disable_irq_nosync(irq);
 			assigned_dev->host_irq_disabled = true;
 		}
+
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
 	}
 
 out:
 	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
-	return IRQ_HANDLED;
+	return ret;
 }
 
 /* Ack the irq line for an assigned device */
@@ -117,7 +218,10 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	 */
 	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
-		enable_irq(dev->host_irq);
+		if (dev->pci_2_3)
+			pci_2_3_mask_irq(dev->dev, 0, NULL);
+		else
+			enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
 	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
@@ -166,7 +270,11 @@ static void deassign_host_irq(struct kvm *kvm,
 		pci_disable_msix(assigned_dev->dev);
 	} else {
 		/* Deal with MSI and INTx */
-		disable_irq(assigned_dev->host_irq);
+		if (assigned_dev->pci_2_3) {
+			pci_2_3_mask_irq(assigned_dev->dev, 1, NULL);
+			synchronize_irq(assigned_dev->host_irq);
+		} else
+			disable_irq(assigned_dev->host_irq);
 
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
@@ -214,6 +322,13 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 
 	pci_reset_function(assigned_dev->dev);
 
+	/*
+	 * Unmask the IRQ at PCI level once the reset is done - the next user
+	 * may not expect the IRQ being masked.
+	 */
+	if (assigned_dev->pci_2_3)
+		pci_2_3_mask_irq(assigned_dev->dev, 0, NULL);
+
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
@@ -239,15 +354,26 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
 static int assigned_device_enable_host_intx(struct kvm *kvm,
 					    struct kvm_assigned_dev_kernel *dev)
 {
+	unsigned long flags = 0;
+
 	dev->host_irq = dev->dev->irq;
-	/* Even though this is PCI, we don't want to use shared
-	 * interrupts. Sharing host devices with guest-assigned devices
-	 * on the same interrupt line is not a happy situation: there
-	 * are going to be long delays in accepting, acking, etc.
+
+	/*
+	 * We can only share the IRQ line with other host devices if we are
+	 * able to disable the IRQ source at device-level - independently of
+	 * the guest driver. Otherwise host devices may suffer from unbounded
+	 * IRQ latencies when the guest keeps the line asserted.
 	 */
+	dev->pci_2_3 = pci_2_3_supported(dev->dev);
+	if (dev->pci_2_3)
+		flags = IRQF_SHARED;
+
 	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
-			0, "kvm_assigned_intx_device", (void *)dev))
+			flags, "kvm_assigned_intx_device", (void *)dev))
 		return -EIO;
+
+	if (dev->pci_2_3)
+		pci_2_3_mask_irq(dev->dev, 0, NULL);
 	return 0;
 }
 
@@ -324,7 +450,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
 {
 	dev->guest_irq = irq->guest_irq;
 	dev->ack_notifier.gsi = -1;
-	dev->host_irq_disabled = false;
 	return 0;
 }
 #endif
@@ -336,7 +461,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
 {
 	dev->guest_irq = irq->guest_irq;
 	dev->ack_notifier.gsi = -1;
-	dev->host_irq_disabled = false;
 	return 0;
 }
 #endif
@@ -367,6 +491,7 @@ static int assign_host_irq(struct kvm *kvm,
 	default:
 		r = -EINVAL;
 	}
+	dev->host_irq_disabled = false;
 
 	if (!r)
 		dev->irq_requested_type |= host_irq_type;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux