Re: [RFC PATCH v2 2/2] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

Alex Williamson <alex.williamson@xxxxxxxxxx> · Tue, 25 Nov 2014 09:04:13 -0700

On Tue, 2014-11-25 at 20:23 +0800, Feng Wu wrote:
> This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
> When guests updates MSI/MSI-x information for an assigned-device,
> QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
> IRTE for VT-d PI. This patch implement this IRQ attribute.
> 
> Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
> ---
>  virt/kvm/vfio.c |  115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 115 insertions(+), 0 deletions(-)
> 
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 6bc7001..435adf4 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -446,6 +446,115 @@ out:
>  	return ret;
>  }
>  
> +static int kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
> +{
> +	/*
> +	 * TODO: need to add the real code to update the related IRTE,
> +	 * Basically, This fucntion will do the following things:
> +	 * - Get struct kvm_kernel_irq_routing_entry from guest irq
> +	 * - Get the destination vCPU of the interrupts
> +	 * - Update the IRTE according the VT-d PI Spec.
> +	 *   1) guest vector
> +	 *   2) Posted-Interrupts descritpor addresss
> +	 */
> +
> +	return 0;

Why not make this an arch_ call like Eric did?

> +}
> +
> +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
> +{
> +	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
> +		u8 pin;
> +		pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
> +		if (pin)
> +			return 1;
> +	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
> +		u8 pos;
> +		u16 flags;
> +
> +		pos = pdev->msi_cap;
> +		if (pos) {
> +			pci_read_config_word(pdev,
> +					     pos + PCI_MSI_FLAGS, &flags);
> +			return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
> +		}

Looks like a copy of vfio code, but since that was written helpers have
been added to the kernel:

return pci_msi_vec_count(pdev);

> +	} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
> +		u8 pos;
> +		u16 flags;
> +
> +		pos = pdev->msix_cap;
> +		if (pos) {
> +			pci_read_config_word(pdev,
> +					     pos + PCI_MSIX_FLAGS, &flags);
> +
> +			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
> +		}

return pci_msix_vec_count(pdev);

> +	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
> +		if (pci_is_pcie(pdev))
> +			return 1;

This is a virtual interrupt, this interface should never be used for it.

> +
> +	return 0;
> +}
> +
> +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
> +{
> +	struct kvm_posted_intr pi_info;
> +	int *virq;
> +	unsigned long minsz;
> +	struct vfio_device *vdev;
> +	struct msi_desc *entry;
> +	struct device *dev;
> +	struct pci_dev *pdev;
> +	int i, max, ret;
> +
> +	minsz = offsetofend(struct kvm_posted_intr, count);
> +
> +	if (copy_from_user(&pi_info, (void __user *)argp, minsz))
> +		return -EFAULT;
> +
> +	if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS)
> +		return -EINVAL;
> +
> +	vdev = kvm_vfio_get_vfio_device(pi_info.fd);
> +	if (IS_ERR(vdev))
> +		return PTR_ERR(vdev);
> +
> +	dev = kvm_vfio_external_base_device(vdev);
> +	if (!dev)
> +		return -EFAULT;

Missing put

> +
> +	pdev = to_pci_dev(dev);

We can't assume the user called with a PCI device, test it.

if (!dev_is_pci(dev)) {
	put
	return errno
}

pdev = to_pci_dev(dev);
...

> +
> +	max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
> +
> +	if (pi_info.argsz - minsz < pi_info.count * sizeof(int) ||
> +	    pi_info.start >= max || pi_info.start + pi_info.count > max)
> +		return -EINVAL;

Missing put

> +
> +	virq = memdup_user((void __user *)((unsigned long)argp + minsz),
> +			   pi_info.count * sizeof(int));
> +	if (IS_ERR(virq))
> +		return PTR_ERR(virq);

put...

> +
> +	for (i=0; i<pi_info.count; i++) {
> +		list_for_each_entry(entry, &pdev->msi_list, list) {

This is unique to MSI/X but INTx and others can still make it here.
Also, this won't compile unless CONFIG_PCI_MSI.  Does anything protect
this list?

> +			if (entry->msi_attrib.entry_nr != pi_info.start+i)
> +				continue;
> +
> +			ret = kvm_update_pi_irte(kdev->kvm,
> +						 entry->irq, virq[i]);
> +			if (ret) {
> +				kfree(virq);
> +				return -EFAULT;

put...

> +			}
> +		}
> +	}
> +
> +	kfree(virq);

put...

> +
> +	return 0;
> +}
> +
>  static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
>  {
>  	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
> @@ -456,6 +565,9 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
>  	case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
>  		ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
>  		break;
> +	case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
> +		ret = kvm_vfio_set_pi(kdev, argp);
> +		break;

ARCH #ifdefs?

>  	default:
>  		ret = -ENXIO;
>  	}
> @@ -511,6 +623,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>  		case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
>  			return 0;
>  #endif
> +		case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
> +			return 0;
> +

Same here

>  		}
>  		break;
>  	}

I only see setup where, what if we want to stop posted interrupts?  What
if we switch to a different mode, for example the guest reboots or
unloads the driver or switches to a different driver.  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html