On Tue, 2014-11-25 at 20:23 +0800, Feng Wu wrote: > This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts. > When guests updates MSI/MSI-x information for an assigned-device, > QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup > IRTE for VT-d PI. This patch implement this IRQ attribute. > > Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx> > --- > virt/kvm/vfio.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 115 insertions(+), 0 deletions(-) > > diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c > index 6bc7001..435adf4 100644 > --- a/virt/kvm/vfio.c > +++ b/virt/kvm/vfio.c > @@ -446,6 +446,115 @@ out: > return ret; > } > > +static int kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq) > +{ > + /* > + * TODO: need to add the real code to update the related IRTE, > + * Basically, This fucntion will do the following things: > + * - Get struct kvm_kernel_irq_routing_entry from guest irq > + * - Get the destination vCPU of the interrupts > + * - Update the IRTE according the VT-d PI Spec. > + * 1) guest vector > + * 2) Posted-Interrupts descritpor addresss > + */ > + > + return 0; Why not make this an arch_ call like Eric did? > +} > + > +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type) > +{ > + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { > + u8 pin; > + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); > + if (pin) > + return 1; > + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { > + u8 pos; > + u16 flags; > + > + pos = pdev->msi_cap; > + if (pos) { > + pci_read_config_word(pdev, > + pos + PCI_MSI_FLAGS, &flags); > + return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1); > + } Looks like a copy of vfio code, but since that was written helpers have been added to the kernel: return pci_msi_vec_count(pdev); > + } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) { > + u8 pos; > + u16 flags; > + > + pos = pdev->msix_cap; > + if (pos) { > + pci_read_config_word(pdev, > + pos + PCI_MSIX_FLAGS, &flags); > + > + return (flags & PCI_MSIX_FLAGS_QSIZE) + 1; > + } return pci_msix_vec_count(pdev); > + } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) > + if (pci_is_pcie(pdev)) > + return 1; This is a virtual interrupt, this interface should never be used for it. > + > + return 0; > +} > + > +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp) > +{ > + struct kvm_posted_intr pi_info; > + int *virq; > + unsigned long minsz; > + struct vfio_device *vdev; > + struct msi_desc *entry; > + struct device *dev; > + struct pci_dev *pdev; > + int i, max, ret; > + > + minsz = offsetofend(struct kvm_posted_intr, count); > + > + if (copy_from_user(&pi_info, (void __user *)argp, minsz)) > + return -EFAULT; > + > + if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS) > + return -EINVAL; > + > + vdev = kvm_vfio_get_vfio_device(pi_info.fd); > + if (IS_ERR(vdev)) > + return PTR_ERR(vdev); > + > + dev = kvm_vfio_external_base_device(vdev); > + if (!dev) > + return -EFAULT; Missing put > + > + pdev = to_pci_dev(dev); We can't assume the user called with a PCI device, test it. if (!dev_is_pci(dev)) { put return errno } pdev = to_pci_dev(dev); ... > + > + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index); > + > + if (pi_info.argsz - minsz < pi_info.count * sizeof(int) || > + pi_info.start >= max || pi_info.start + pi_info.count > max) > + return -EINVAL; Missing put > + > + virq = memdup_user((void __user *)((unsigned long)argp + minsz), > + pi_info.count * sizeof(int)); > + if (IS_ERR(virq)) > + return PTR_ERR(virq); put... > + > + for (i=0; i<pi_info.count; i++) { > + list_for_each_entry(entry, &pdev->msi_list, list) { This is unique to MSI/X but INTx and others can still make it here. Also, this won't compile unless CONFIG_PCI_MSI. Does anything protect this list? > + if (entry->msi_attrib.entry_nr != pi_info.start+i) > + continue; > + > + ret = kvm_update_pi_irte(kdev->kvm, > + entry->irq, virq[i]); > + if (ret) { > + kfree(virq); > + return -EFAULT; put... > + } > + } > + } > + > + kfree(virq); put... > + > + return 0; > +} > + > static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) > { > int32_t __user *argp = (int32_t __user *)(unsigned long)arg; > @@ -456,6 +565,9 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) > case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: > ret = kvm_vfio_control_irq_forward(kdev, attr, argp); > break; > + case KVM_DEV_VFIO_DEVICE_POSTING_IRQ: > + ret = kvm_vfio_set_pi(kdev, argp); > + break; ARCH #ifdefs? > default: > ret = -ENXIO; > } > @@ -511,6 +623,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, > case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: > return 0; > #endif > + case KVM_DEV_VFIO_DEVICE_POSTING_IRQ: > + return 0; > + Same here > } > break; > } I only see setup where, what if we want to stop posted interrupts? What if we switch to a different mode, for example the guest reboots or unloads the driver or switches to a different driver. Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html