Re: [PATCH] vfio-pci: Virtualize PCIe & AF FLR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2016-09-22 at 10:07 -0600, Alex Williamson wrote:
> We use a BAR restore trick to try to detect when a user has performed
> a device reset, possibly through FLR or other backdoors, to put things
> back into a working state.  This is important for backdoor resets, but
> we can actually just virtualize the "front door" resets provided via
> PCIe and AF FLR.  Set these bits as virtualized + writable, allowing
> the default write to set them in vconfig, then we can simply check the
> bit, perform an FLR of our own, and clear the bit.  We don't actually
> have the granularity in PCI to specify the type of reset we want to
> do, but generally devices don't implement both PCIe and AF FLR and
> we'll favor these over other types of reset, so we should generally
> lineup.  We do test whether the device provides the requested FLR type
> to stay consistent with hardware capabilities though.
> 
> This seems to fix several instance of devices getting into bad states
> with userspace drivers, like dpdk, running inside a VM.
> 
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
>  drivers/vfio/pci/vfio_pci_config.c |   82 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index 688691d..3884888 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -804,6 +804,40 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
>  	return 0;
>  }
>  
> +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
> +				 int count, struct perm_bits *perm,
> +				 int offset, __le32 val)
> +{
> +	__le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
> +				  offset + PCI_EXP_DEVCTL);
> +
> +	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +	if (count < 0)
> +		return count;
> +
> +	/*
> +	 * The FLR bit is virtualized, if set and the device supports PCIe
> +	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +	 * requires it to be always read as zero.  NB, reset_function might
> +	 * not use a PCIe FLR, we don't have that level of granularity.
> +	 */
> +	if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) {
> +		u32 cap;
> +		int ret;
> +
> +		*ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR);
> +
> +		ret = pci_user_read_config_dword(vdev->pdev,
> +						 pos - offset + PCI_EXP_DEVCAP,
> +						 &cap);
> +
> +		if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
> +			pci_try_reset_function(vdev->pdev);
> +	}
> +
> +	return count;
> +}
> +
>  /* Permissions for PCI Express capability */
>  static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
>  {
> @@ -811,26 +845,64 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
>  	if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2))
>  		return -ENOMEM;
>  
> +	perm->writefn = vfio_exp_config_write;
> +
>  	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
>  
>  	/*
> -	 * Allow writes to device control fields (includes FLR!)
> -	 * but not to devctl_phantom which could confuse IOMMU
> -	 * or to the ARI bit in devctl2 which is set at probe time
> +	 * Allow writes to device control fields, except devctl_phantom,
> +	 * which could confuse IOMMU, and the ARI bit in devctl2, which
> +	 * is set at probe time.  FLR gets virtualized via our writefn.
>  	 */
> -	p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM);
> +	p_setw(perm, PCI_EXP_DEVCTL,
> +	       PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
>  	p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
>  	return 0;
>  }
>  
> +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
> +				int count, struct perm_bits *perm,
> +				int offset, __le32 val)
> +{
> +	u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL;
> +
> +	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +	if (count < 0)
> +		return count;
> +
> +	/*
> +	 * The FLR bit is virtualized, if set and the device supports AF
> +	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +	 * requires it to be always read as zero.  NB, reset_function might
> +	 * not use an AF FLR, we don't have that level of granularity.
> +	 */
> +	if (*ctrl & PCI_AF_CTRL_FLR) {
> +		u8 cap;
> +		int ret;
> +
> +		*ctrl &= ~PCI_AF_CTRL_FLR;
> +
> +		ret = pci_user_read_config_byte(vdev->pdev,
> +						pos - offset + PCI_AF_CAP,
> +						&cap);
> +
> +		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
> +			pci_try_reset_function(vdev->pdev);
> +	}
> +
> +	return count;
> +}
> +
>  /* Permissions for Advanced Function capability */
>  static int __init init_pci_cap_af_perm(struct perm_bits *perm)
>  {
>  	if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF]))
>  		return -ENOMEM;
>  
> +	perm->writefn = vfio_af_config_write;
> +
>  	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
> -	p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR);
> +	p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR);
>  	return 0;
>  }
>  
> 

Looks good.

Reviewed-by: Greg Rose <grose@xxxxxxxxxxxxxx>


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux