Re: [PATCH v2] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 11, 2016 at 07:34:19PM +0800, Yongji Xie wrote:
> Current vfio-pci implementation disallows to mmap
> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
> page may be shared with other BARs. This will cause some
> performance issues when we passthrough a PCI device with
> this kind of BARs. Guest will be not able to handle the mmio
> accesses to the BARs which leads to mmio emulations in host.
> 
> However, not all sub-page BARs will share page with other BARs.
> We should allow to mmap the sub-page MMIO BARs which we can
> make sure will not share page with other BARs.
> 
> This patch adds support for this case. And we try to add some
> dummy resources to reserve the remaind of the page which
> hot-add device's BAR might be assigned into.

This is starting to look more reasonable from a safety perspective.
At least I don't have an allergic reaction to mapping a page that may
contain BARs from other random devices :)

> Signed-off-by: Yongji Xie <xyjxie@xxxxxxxxxxxxxxxxxx>
> ---
>  drivers/vfio/pci/vfio_pci.c         |   85 ++++++++++++++++++++++++++++++++---
>  drivers/vfio/pci/vfio_pci_private.h |    8 ++++
>  2 files changed, 86 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 98059df..33282b8 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -110,16 +110,77 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
>  	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
>  }
>  
> +static bool vfio_pci_bar_mmap_supported(struct vfio_pci_device *vdev, int index)
> +{
> +	struct resource *res = vdev->pdev->resource + index;
> +	struct vfio_pci_dummy_resource *dummy_res1 = NULL;
> +	struct vfio_pci_dummy_resource *dummy_res2 = NULL;
> +
> +	if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && res->flags & IORESOURCE_MEM &&
> +		resource_size(res) > 0) {
> +		if (resource_size(res) >= PAGE_SIZE)
> +			return true;
> +
> +		if ((res->start & ~PAGE_MASK)) {
> +			/*
> +			 * Add a dummy resource to reserve the portion
> +			 * before res->start in exclusive page in case
> +			 * that hot-add device's bar is assigned into it.
> +			 */
> +			dummy_res1 = kzalloc(sizeof(*dummy_res1), GFP_KERNEL);

Should check for kzalloc() failure here.

> +			dummy_res1->resource.start = res->start & PAGE_MASK;
> +			dummy_res1->resource.end = res->start - 1;
> +			dummy_res1->resource.flags = res->flags;
> +			if (request_resource(res->parent,
> +						&dummy_res1->resource)) {
> +				kfree(dummy_res1);
> +				return false;
> +			}
> +			dummy_res1->index = index;
> +			list_add(&dummy_res1->res_next,
> +					&vdev->dummy_resources_list);

The main body of this function is unnecessarily indented.  If you did
this it would be less indented:

  if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
    return false;

  if (!res->flags & IORESOURCE_MEM)
    return false;

  /*
   * Not sure this is necessary; the PCI core *shouldn't* set up a
   * resource with a type but zero size.  But there may be bugs that
   * cause us to do that.
   */
  if (!resource_size(res))
    return false;

  if (resource_size(res) >= PAGE_SIZE)
    return true;

  if ((res->start & ~PAGE_MASK)) {
    ...

> +		}
> +		if (((res->end + 1) & ~PAGE_MASK)) {
> +			/*
> +			 * Add a dummy resource to reserve the portion
> +			 * after res->end.
> +			 */
> +			dummy_res2 = kzalloc(sizeof(*dummy_res2), GFP_KERNEL);
> +			dummy_res2->resource.start = res->end + 1;
> +			dummy_res2->resource.end = (res->start & PAGE_MASK) +
> +							PAGE_SIZE - 1;
> +			dummy_res2->resource.flags = res->flags;
> +			if (request_resource(res->parent,
> +						&dummy_res2->resource)) {
> +				if (dummy_res1) {
> +					list_del(&dummy_res1->res_next);
> +					release_resource(&dummy_res1->resource);
> +					kfree(dummy_res1);
> +				}
> +				kfree(dummy_res2);
> +				return false;
> +			}
> +			dummy_res2->index = index;
> +			list_add(&dummy_res2->res_next,
> +					&vdev->dummy_resources_list);
> +		}
> +		return true;
> +	}
> +	return false;
> +}
> +
>  static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
>  static void vfio_pci_disable(struct vfio_pci_device *vdev);
>  
>  static int vfio_pci_enable(struct vfio_pci_device *vdev)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
> -	int ret;
> +	int ret, bar;
>  	u16 cmd;
>  	u8 msix_pos;
>  
> +	INIT_LIST_HEAD(&vdev->dummy_resources_list);
> +
>  	pci_set_power_state(pdev, PCI_D0);
>  
>  	/* Don't allow our initial saved state to include busmaster */
> @@ -183,12 +244,17 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
>  		}
>  	}
>  
> +	for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
> +		vdev->bar_mmap_supported[bar] =
> +				vfio_pci_bar_mmap_supported(vdev, bar);
> +	}
>  	return 0;
>  }
>  
>  static void vfio_pci_disable(struct vfio_pci_device *vdev)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
> +	struct vfio_pci_dummy_resource *dummy_res, *tmp;
>  	int i, bar;
>  
>  	/* Stop the device from further DMA */
> @@ -217,6 +283,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
>  		vdev->barmap[bar] = NULL;
>  	}
>  
> +	list_for_each_entry_safe(dummy_res, tmp,
> +				 &vdev->dummy_resources_list, res_next) {
> +		list_del(&dummy_res->res_next);
> +		release_resource(&dummy_res->resource);
> +		kfree(dummy_res);
> +	}
> +
>  	vdev->needs_reset = true;
>  
>  	/*
> @@ -587,9 +660,7 @@ static long vfio_pci_ioctl(void *device_data,
>  
>  			info.flags = VFIO_REGION_INFO_FLAG_READ |
>  				     VFIO_REGION_INFO_FLAG_WRITE;
> -			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
> -			    pci_resource_flags(pdev, info.index) &
> -			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
> +			if (vdev->bar_mmap_supported[info.index]) {
>  				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
>  				if (info.index == vdev->msix_bar) {
>  					ret = msix_sparse_mmap_cap(vdev, &caps);
> @@ -1011,16 +1082,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
>  		return -EINVAL;
>  	if (index >= VFIO_PCI_ROM_REGION_INDEX)
>  		return -EINVAL;
> -	if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
> +	if (!vdev->bar_mmap_supported[index])
>  		return -EINVAL;
>  
> -	phys_len = pci_resource_len(pdev, index);
> +	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
>  	req_len = vma->vm_end - vma->vm_start;
>  	pgoff = vma->vm_pgoff &
>  		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
>  	req_start = pgoff << PAGE_SHIFT;
>  
> -	if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
> +	if (req_start + req_len > phys_len)
>  		return -EINVAL;
>  
>  	if (index == vdev->msix_bar) {
> diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
> index 8a7d546..a4233a8 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -57,9 +57,16 @@ struct vfio_pci_region {
>  	u32				flags;
>  };
>  
> +struct vfio_pci_dummy_resource {
> +	struct resource		resource;
> +	int			index;
> +	struct list_head	res_next;
> +};
> +
>  struct vfio_pci_device {
>  	struct pci_dev		*pdev;
>  	void __iomem		*barmap[PCI_STD_RESOURCE_END + 1];
> +	bool			bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
>  	u8			*pci_config_map;
>  	u8			*vconfig;
>  	struct perm_bits	*msi_perm;
> @@ -87,6 +94,7 @@ struct vfio_pci_device {
>  	int			refcnt;
>  	struct eventfd_ctx	*err_trigger;
>  	struct eventfd_ctx	*req_trigger;
> +	struct list_head	dummy_resources_list;
>  };
>  
>  #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
> -- 
> 1.7.9.5
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux