This patch adds the kvm-vfio interface for VT-d Posted-Interrupts. When guests update MSI/MSI-x information for an assigned-device, QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup IRTE for VT-d PI. Userspace program can also use KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping mode. This patch implements these IRQ attributes. Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx> --- include/linux/kvm_host.h | 22 +++++++++ virt/kvm/vfio.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f591f7c..69f8711 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1073,6 +1073,28 @@ extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST +/* + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts + * + * @kvm: kvm + * @host_irq: host irq of the interrupt + * @guest_irq: gsi of the interrupt + * @set: set or unset PI + * returns 0 on success, < 0 on failure + */ +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); +#else +static inline int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, + unsigned int host_irq, + uint32_t guest_irq, + bool set) +{ + return 0; +} +#endif + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 80a45e4..547fc51 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <asm/irq_remapping.h> #include "vfio.h" struct kvm_vfio_group { @@ -276,12 +277,128 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) return -ENXIO; } +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type) +{ + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { + u8 pin; + + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); + if (pin) + return 1; + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { + return pci_msi_vec_count(pdev); + } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) { + return pci_msix_vec_count(pdev); + } + + return 0; +} + +static int kvm_vfio_control_pi(struct kvm_device *kdev, + int32_t __user *argp, bool set) +{ + struct kvm_vfio_dev_irq pi_info; + uint32_t *gsi; + unsigned long minsz; + struct vfio_device *vdev; + struct msi_desc *entry; + struct device *dev; + struct pci_dev *pdev; + int i, max, ret; + + minsz = offsetofend(struct kvm_vfio_dev_irq, count); + + if (copy_from_user(&pi_info, (void __user *)argp, minsz)) + return -EFAULT; + + if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS) + return -EINVAL; + + vdev = kvm_vfio_get_vfio_device(pi_info.fd); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + dev = kvm_vfio_external_base_device(vdev); + if (!dev || !dev_is_pci(dev)) { + ret = -EFAULT; + goto put_vfio_device; + } + + pdev = to_pci_dev(dev); + + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index); + if (max <= 0) { + ret = -EFAULT; + goto put_vfio_device; + } + + if (pi_info.argsz - minsz < pi_info.count * sizeof(u32) || + pi_info.start >= max || pi_info.start + pi_info.count > max) { + ret = -EINVAL; + goto put_vfio_device; + } + + gsi = memdup_user((void __user *)((unsigned long)argp + minsz), + pi_info.count * sizeof(u32)); + if (IS_ERR(gsi)) { + ret = PTR_ERR(gsi); + goto put_vfio_device; + } + +#ifdef CONFIG_PCI_MSI + for (i = 0; i < pi_info.count; i++) { + list_for_each_entry(entry, &pdev->msi_list, list) { + if (entry->msi_attrib.entry_nr != pi_info.start+i) + continue; + + ret = kvm_arch_vfio_update_pi_irte(kdev->kvm, + entry->irq, + gsi[i], + set); + if (ret) + goto free_gsi; + } + } +#endif + + ret = 0; + +free_gsi: + kfree(gsi); + +put_vfio_device: + kvm_vfio_put_vfio_device(vdev); + return ret; +} + +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) +{ + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; + int ret; + + switch (attr) { +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST + case KVM_DEV_VFIO_DEVICE_POST_IRQ: + ret = kvm_vfio_control_pi(kdev, argp, 1); + break; + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: + ret = kvm_vfio_control_pi(kdev, argp, 0); + break; +#endif + default: + ret = -ENXIO; + } + return ret; +} + static int kvm_vfio_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { switch (attr->group) { case KVM_DEV_VFIO_GROUP: return kvm_vfio_set_group(dev, attr->attr, attr->addr); + case KVM_DEV_VFIO_DEVICE: + return kvm_vfio_set_device(dev, attr->attr, attr->addr); } return -ENXIO; @@ -299,6 +416,15 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, } break; + case KVM_DEV_VFIO_DEVICE: + switch (attr->attr) { +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST + case KVM_DEV_VFIO_DEVICE_POST_IRQ: + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: + return irq_remapping_cap(IRQ_POSTING_CAP) ? 0 : -ENXIO; +#endif + } + break; } return -ENXIO; -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html