This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts. When guests update MSI/MSI-x information for an assigned-device, QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup IRTE for VT-d PI. Userspace program can also use KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping mode. This patch implements these IRQ attributes. Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx> --- include/linux/kvm_host.h | 20 +++++++++ virt/kvm/vfio.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5cd4420..ca9a393 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1134,6 +1134,26 @@ static inline int kvm_arch_vfio_set_forward(struct kvm_fwd_irq *fwd_irq, } #endif +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST +/* + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts + * + * @kvm: kvm + * @host_irq: host irq of the interrupt + * @guest_irq: gsi of the interrupt + * @set: set or unset PI + * returns 0 on success, < 0 on failure + */ +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); +#else +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; +} +#endif + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 6bc7001..dbc6c3b 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -446,6 +446,99 @@ out: return ret; } +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type) +{ + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { + u8 pin; + + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); + if (pin) + return 1; + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) + return pci_msi_vec_count(pdev); + else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) + return pci_msix_vec_count(pdev); + + return 0; +} + +static int kvm_vfio_control_pi(struct kvm_device *kdev, + int32_t __user *argp, bool set) +{ + struct kvm_vfio_dev_irq pi_info; + uint32_t *gsi; + unsigned long minsz; + struct vfio_device *vdev; + struct msi_desc *entry; + struct device *dev; + struct pci_dev *pdev; + int i, max, ret; + + minsz = offsetofend(struct kvm_vfio_dev_irq, count); + + if (copy_from_user(&pi_info, (void __user *)argp, minsz)) + return -EFAULT; + + if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS) + return -EINVAL; + + vdev = kvm_vfio_get_vfio_device(pi_info.fd); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + dev = kvm_vfio_external_base_device(vdev); + if (!dev || !dev_is_pci(dev)) { + ret = -EFAULT; + goto put_vfio_device; + } + + pdev = to_pci_dev(dev); + + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index); + if (max <= 0) { + ret = -EFAULT; + goto put_vfio_device; + } + + if (pi_info.argsz - minsz < pi_info.count * sizeof(u32) || + pi_info.start >= max || pi_info.start + pi_info.count > max) { + ret = -EINVAL; + goto put_vfio_device; + } + + gsi = memdup_user((void __user *)((unsigned long)argp + minsz), + pi_info.count * sizeof(u32)); + if (IS_ERR(gsi)) { + ret = PTR_ERR(gsi); + goto put_vfio_device; + } + +#ifdef CONFIG_PCI_MSI + for (i = 0; i < pi_info.count; i++) { + list_for_each_entry(entry, &pdev->msi_list, list) { + if (entry->msi_attrib.entry_nr != pi_info.start+i) + continue; + + ret = kvm_arch_vfio_update_pi_irte(kdev->kvm, + entry->irq, + gsi[i], + set); + if (ret) + goto free_gsi; + } + } +#endif + + ret = 0; + +free_gsi: + kfree(gsi); + +put_vfio_device: + kvm_vfio_put_vfio_device(vdev); + return ret; +} + static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) { int32_t __user *argp = (int32_t __user *)(unsigned long)arg; @@ -456,6 +549,14 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: ret = kvm_vfio_control_irq_forward(kdev, attr, argp); break; +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST + case KVM_DEV_VFIO_DEVICE_POST_IRQ: + ret = kvm_vfio_control_pi(kdev, argp, 1); + break; + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: + ret = kvm_vfio_control_pi(kdev, argp, 0); + break; +#endif default: ret = -ENXIO; } @@ -511,6 +612,12 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: return 0; #endif +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST + case KVM_DEV_VFIO_DEVICE_POST_IRQ: + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: + return 0; +#endif + } break; } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html