PCI 2.3 allows to generically disable IRQ sources at device level. This enables us to share IRQs of such devices between on the host side when passing them to a guest. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- include/linux/kvm_host.h | 1 + virt/kvm/assigned-dev.c | 194 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 180 insertions(+), 15 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 46120da..fdc2bd9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -466,6 +466,7 @@ struct kvm_assigned_dev_kernel { unsigned int entries_nr; int host_irq; bool host_irq_disabled; + bool pci_2_3; struct msix_entry *host_msix_entries; int guest_irq; struct msix_entry *guest_msix_entries; diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index ca402ed..91fe9c8 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -55,17 +55,145 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel return index; } +/* + * Verify that the device supports Interrupt Disable bit in command register, + * per PCI 2.3, by flipping this bit and reading it back: this bit was readonly + * in PCI 2.2. + */ +static bool pci_2_3_supported(struct pci_dev *pdev) +{ + bool supported = false; + u16 orig, new; + + pci_block_user_cfg_access(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &orig); + pci_write_config_word(pdev, PCI_COMMAND, + orig ^ PCI_COMMAND_INTX_DISABLE); + pci_read_config_word(pdev, PCI_COMMAND, &new); + + /* + * There's no way to protect against + * hardware bugs or detect them reliably, but as long as we know + * what the value should be, let's go ahead and check it. + */ + if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { + dev_err(&pdev->dev, "Command changed from 0x%x to 0x%x: " + "driver or HW bug?\n", orig, new); + goto out; + } + if (!((new ^ orig) & PCI_COMMAND_INTX_DISABLE)) { + dev_warn(&pdev->dev, "Device does not support " + "disabling interrupts: unable to bind.\n"); + goto out; + } + supported = true; + + /* Now restore the original value. */ + pci_write_config_word(pdev, PCI_COMMAND, orig); + +out: + pci_unblock_user_cfg_access(pdev); + return supported; +} + +static unsigned int +pci_2_3_set_irq_mask(struct pci_dev *dev, bool mask, bool check_status) +{ + u32 cmd_status_dword; + u16 origcmd, newcmd; + unsigned int status; + + /* + * We do a single dword read to retrieve both command and status. + * Document assumptions that make this possible. + */ + BUILD_BUG_ON(PCI_COMMAND % 4); + BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS); + + pci_block_user_cfg_access(dev); + + /* + * Read both command and status registers in a single 32-bit operation. + * Note: we could cache the value for command and move the status read + * out of the lock if there was a way to get notified of user changes + * to command register through sysfs. Should be good for shared irqs. + */ + pci_read_config_dword(dev, PCI_COMMAND, &cmd_status_dword); + origcmd = cmd_status_dword; + status = cmd_status_dword >> 16; + + if (check_status) { + bool irq_pending = status & PCI_STATUS_INTERRUPT; + + /* + * Check interrupt status register to see whether our device + * triggered the interrupt (when masking) or the next IRQ is + * already pending (when unmasking). + */ + if (!(mask == irq_pending)) + goto done; + } + + newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE; + if (mask) + newcmd |= PCI_COMMAND_INTX_DISABLE; + if (newcmd != origcmd) + pci_write_config_word(dev, PCI_COMMAND, newcmd); + +done: + pci_unblock_user_cfg_access(dev); + return status; +} + +static void pci_2_3_irq_mask(struct pci_dev *dev) +{ + pci_2_3_set_irq_mask(dev, true, false); +} + +static unsigned int pci_2_3_irq_check_and_mask(struct pci_dev *dev) +{ + return pci_2_3_set_irq_mask(dev, true, true); +} + +static void pci_2_3_irq_unmask(struct pci_dev *dev) +{ + pci_2_3_set_irq_mask(dev, false, false); +} + +static unsigned int pci_2_3_irq_check_and_unmask(struct pci_dev *dev) +{ + return pci_2_3_set_irq_mask(dev, false, true); +} + +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret = IRQ_WAKE_THREAD; + + spin_lock(&assigned_dev->intx_lock); + if (assigned_dev->host_irq_disabled || + !(pci_2_3_irq_check_and_mask(assigned_dev->dev) & + PCI_STATUS_INTERRUPT)) + ret = IRQ_NONE; + else + assigned_dev->host_irq_disabled = true; + spin_unlock(&assigned_dev->intx_lock); + + return ret; +} + static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; u32 vector; int index; - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { - spin_lock(&assigned_dev->intx_lock); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX && + !assigned_dev->pci_2_3) { + spin_lock_irq(&assigned_dev->intx_lock); disable_irq_nosync(irq); assigned_dev->host_irq_disabled = true; - spin_unlock(&assigned_dev->intx_lock); + spin_unlock_irq(&assigned_dev->intx_lock); } if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { @@ -87,6 +215,7 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_assigned_dev_kernel *dev; + bool reassert = false; if (kian->gsi == -1) return; @@ -99,12 +228,23 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) /* The guest irq may be shared so this ack may be * from another device. */ - spin_lock(&dev->intx_lock); + spin_lock_irq(&dev->intx_lock); if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); + if (dev->pci_2_3) { + if (pci_2_3_irq_check_and_unmask(dev->dev) & + PCI_STATUS_INTERRUPT) { + reassert = true; + goto out; + } + } else + enable_irq(dev->host_irq); dev->host_irq_disabled = false; } - spin_unlock(&dev->intx_lock); +out: + spin_unlock_irq(&dev->intx_lock); + + if (reassert) + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 1); } static void deassign_guest_irq(struct kvm *kvm, @@ -151,7 +291,11 @@ static void deassign_host_irq(struct kvm *kvm, pci_disable_msix(assigned_dev->dev); } else { /* Deal with MSI and INTx */ - disable_irq(assigned_dev->host_irq); + if (assigned_dev->pci_2_3) { + pci_2_3_irq_mask(assigned_dev->dev); + synchronize_irq(assigned_dev->host_irq); + } else + disable_irq(assigned_dev->host_irq); free_irq(assigned_dev->host_irq, (void *)assigned_dev); @@ -199,6 +343,13 @@ static void kvm_free_assigned_device(struct kvm *kvm, pci_reset_function(assigned_dev->dev); + /* + * Unmask the IRQ at PCI level once the reset is done - the next user + * may not expect the IRQ being masked. + */ + if (assigned_dev->pci_2_3) + pci_2_3_irq_unmask(assigned_dev->dev); + pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); pci_dev_put(assigned_dev->dev); @@ -224,15 +375,29 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) static int assigned_device_enable_host_intx(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { + irq_handler_t irq_handler = NULL; + unsigned long flags = 0; + dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. + + /* + * We can only share the IRQ line with other host devices if we are + * able to disable the IRQ source at device-level - independently of + * the guest driver. Otherwise host devices may suffer from unbounded + * IRQ latencies when the guest keeps the line asserted. */ - if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, (void *)dev)) + dev->pci_2_3 = pci_2_3_supported(dev->dev); + if (dev->pci_2_3) { + irq_handler = kvm_assigned_dev_intr; + flags = IRQF_SHARED; + } + if (request_threaded_irq(dev->host_irq, irq_handler, + kvm_assigned_dev_thread, flags, + dev->irq_name, (void *)dev)) return -EIO; + + if (dev->pci_2_3) + pci_2_3_irq_unmask(dev->dev); return 0; } @@ -308,7 +473,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -320,7 +484,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -354,6 +517,7 @@ static int assign_host_irq(struct kvm *kvm, default: r = -EINVAL; } + dev->host_irq_disabled = false; if (!r) dev->irq_requested_type |= host_irq_type; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html