Re: [RFC PATCH v2 4/5] vfio/pci: Invalidate mmaps and block the access in D3hot power state

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 24 Jan 2022 23:47:25 +0530
Abhishek Sahu <abhsahu@xxxxxxxxxx> wrote:

> According to [PCIe v5 5.3.1.4.1] for D3hot state
> 
>  "Configuration and Message requests are the only TLPs accepted by a
>   Function in the D3Hot state. All other received Requests must be
>   handled as Unsupported Requests, and all received Completions may
>   optionally be handled as Unexpected Completions."
> 
> Currently, if the vfio PCI device has been put into D3hot state and if
> user makes non-config related read/write request in D3hot state, these
> requests will be forwarded to the host and this access may cause
> issues on a few systems.
> 
> This patch leverages the memory-disable support added in commit
> 'abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access on
> disabled memory")' to generate page fault on mmap access and
> return error for the direct read/write. If the device is D3hot state,
> then the error needs to be returned for all kinds of BAR
> related access (memory, IO and ROM). Also, the power related structure
> fields need to be protected so we can use the same 'memory_lock' to
> protect these fields also. For the few cases, this 'memory_lock' will be
> already acquired by callers so introduce a separate function
> vfio_pci_set_power_state_locked(). The original
> vfio_pci_set_power_state() now contains the code to do the locking
> related operations.
> 
> Signed-off-by: Abhishek Sahu <abhsahu@xxxxxxxxxx>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 47 +++++++++++++++++++++++++-------
>  drivers/vfio/pci/vfio_pci_rdwr.c | 20 ++++++++++----
>  2 files changed, 51 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index ee2fb8af57fa..38440d48973f 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -201,11 +201,12 @@ static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
>  }
>  
>  /*
> - * pci_set_power_state() wrapper handling devices which perform a soft reset on
> - * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
> - * restore when returned to D0.  Saved separately from pci_saved_state for use
> - * by PM capability emulation and separately from pci_dev internal saved state
> - * to avoid it being overwritten and consumed around other resets.
> + * vfio_pci_set_power_state_locked() wrapper handling devices which perform a
> + * soft reset on D3->D0 transition.  Save state prior to D0/1/2->D3, stash it
> + * on the vdev, restore when returned to D0.  Saved separately from
> + * pci_saved_state for use by PM capability emulation and separately from
> + * pci_dev internal saved state to avoid it being overwritten and consumed
> + * around other resets.
>   *
>   * There are few cases where the PCI power state can be changed to D0
>   * without the involvement of this API. So, cache the power state locally
> @@ -215,7 +216,8 @@ static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
>   * The memory taken for saving this PCI state needs to be freed to
>   * prevent memory leak.
>   */
> -int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state)
> +static int vfio_pci_set_power_state_locked(struct vfio_pci_core_device *vdev,
> +					   pci_power_t state)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
>  	bool needs_restore = false, needs_save = false;
> @@ -260,6 +262,26 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat
>  	return ret;
>  }
>  
> +/*
> + * vfio_pci_set_power_state() takes all the required locks to protect
> + * the access of power related variables and then invokes
> + * vfio_pci_set_power_state_locked().
> + */
> +int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
> +			     pci_power_t state)
> +{
> +	int ret;
> +
> +	if (state >= PCI_D3hot)
> +		vfio_pci_zap_and_down_write_memory_lock(vdev);
> +	else
> +		down_write(&vdev->memory_lock);
> +
> +	ret = vfio_pci_set_power_state_locked(vdev, state);
> +	up_write(&vdev->memory_lock);
> +	return ret;
> +}
> +
>  int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
> @@ -354,7 +376,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
>  	 * in running the logic needed for D0 power state. The subsequent
>  	 * runtime PM API's will put the device into the low power state again.
>  	 */
> -	vfio_pci_set_power_state(vdev, PCI_D0);
> +	vfio_pci_set_power_state_locked(vdev, PCI_D0);
>  
>  	/* Stop the device from further DMA */
>  	pci_clear_master(pdev);
> @@ -967,7 +989,7 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
>  		 * interaction. Update the power state in vfio driver to perform
>  		 * the logic needed for D0 power state.
>  		 */
> -		vfio_pci_set_power_state(vdev, PCI_D0);
> +		vfio_pci_set_power_state_locked(vdev, PCI_D0);
>  		up_write(&vdev->memory_lock);
>  
>  		return ret;
> @@ -1453,6 +1475,11 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
>  		goto up_out;
>  	}
>  
> +	if (vdev->power_state >= PCI_D3hot) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto up_out;
> +	}
> +
>  	/*
>  	 * We populate the whole vma on fault, so we need to test whether
>  	 * the vma has already been mapped, such as for concurrent faults
> @@ -1902,7 +1929,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
>  	 * be able to get to D3.  Therefore first do a D0 transition
>  	 * before enabling runtime PM.
>  	 */
> -	vfio_pci_set_power_state(vdev, PCI_D0);
> +	vfio_pci_set_power_state_locked(vdev, PCI_D0);
>  	pm_runtime_allow(&pdev->dev);
>  
>  	if (!disable_idle_d3)
> @@ -2117,7 +2144,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  		 * interaction. Update the power state in vfio driver to perform
>  		 * the logic needed for D0 power state.
>  		 */
> -		vfio_pci_set_power_state(cur, PCI_D0);
> +		vfio_pci_set_power_state_locked(cur, PCI_D0);
>  		if (cur == cur_mem)
>  			is_mem = false;
>  		if (cur == cur_vma)
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 57d3b2cbbd8e..e97ba14c4aa0 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -41,8 +41,13 @@
>  static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,		\
>  			bool test_mem, u##size val, void __iomem *io)	\
>  {									\
> +	down_read(&vdev->memory_lock);					\
> +	if (vdev->power_state >= PCI_D3hot) {				\
> +		up_read(&vdev->memory_lock);				\
> +		return -EIO;						\
> +	}								\
> +									\

The reason that we only set test_mem for MMIO BARs is that systems are
generally more lenient about probing unresponsive I/O port space to
support legacy use cases.  Have you found cases where access to an I/O
port BAR when the device is either in D3hot+ or I/O port is disabled in
the command register triggers a system fault?  If not it seems we could
roll the power_state check into __vfio_pci_memory_enabled(), if so then
we probably need to improve our coverage of access to disabled I/O port
BARs beyond only the power_state check.  Thanks,

Alex

>  	if (test_mem) {							\
> -		down_read(&vdev->memory_lock);				\
>  		if (!__vfio_pci_memory_enabled(vdev)) {			\
>  			up_read(&vdev->memory_lock);			\
>  			return -EIO;					\
> @@ -51,8 +56,7 @@ static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,		\
>  									\
>  	vfio_iowrite##size(val, io);					\
>  									\
> -	if (test_mem)							\
> -		up_read(&vdev->memory_lock);				\
> +	up_read(&vdev->memory_lock);					\
>  									\
>  	return 0;							\
>  }
> @@ -68,8 +72,13 @@ VFIO_IOWRITE(64)
>  static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,		\
>  			bool test_mem, u##size *val, void __iomem *io)	\
>  {									\
> +	down_read(&vdev->memory_lock);					\
> +	if (vdev->power_state >= PCI_D3hot) {				\
> +		up_read(&vdev->memory_lock);				\
> +		return -EIO;						\
> +	}								\
> +									\
>  	if (test_mem) {							\
> -		down_read(&vdev->memory_lock);				\
>  		if (!__vfio_pci_memory_enabled(vdev)) {			\
>  			up_read(&vdev->memory_lock);			\
>  			return -EIO;					\
> @@ -78,8 +87,7 @@ static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,		\
>  									\
>  	*val = vfio_ioread##size(io);					\
>  									\
> -	if (test_mem)							\
> -		up_read(&vdev->memory_lock);				\
> +	up_read(&vdev->memory_lock);					\
>  									\
>  	return 0;							\
>  }




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux