Re: [PATCH v4] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 23 Jun 2016 10:39:30 +0800
Yongji Xie <xyjxie@xxxxxxxxxxxxxxxxxx> wrote:

> Hi, Alex
> 
> On 2016/6/23 6:04, Alex Williamson wrote:
> 
> > On Mon, 30 May 2016 21:06:37 +0800
> > Yongji Xie <xyjxie@xxxxxxxxxxxxxxxxxx> wrote:
> >  
> >> Current vfio-pci implementation disallows to mmap
> >> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
> >> page may be shared with other BARs. This will cause some
> >> performance issues when we passthrough a PCI device with
> >> this kind of BARs. Guest will be not able to handle the mmio
> >> accesses to the BARs which leads to mmio emulations in host.
> >>
> >> However, not all sub-page BARs will share page with other BARs.
> >> We should allow to mmap the sub-page MMIO BARs which we can
> >> make sure will not share page with other BARs.
> >>
> >> This patch adds support for this case. And we try to add a
> >> dummy resource to reserve the remainder of the page which
> >> hot-add device's BAR might be assigned into. But it's not
> >> necessary to handle the case when the BAR is not page aligned.
> >> Because we can't expect the BAR will be assigned into the same
> >> location in a page in guest when we passthrough the BAR. And
> >> it's hard to access this BAR in userspace because we have
> >> no way to get the BAR's location in a page.
> >>
> >> Signed-off-by: Yongji Xie <xyjxie@xxxxxxxxxxxxxxxxxx>
> >> ---  
> > Hi Yongji,
> >
> > On 5/22, message-id
> > <201605230345.u4N3dJip043323@xxxxxxxxxxxxxxxxxxxxxxxxxx> you indicated
> > you'd post the QEMU code which is enabled by this patch "soon".  Have I
> > missed that?  I'm still waiting to see it.  Thanks,
> >
> > Alex  
> 
> I posted it on May 24th [1]. Do I need to resend it?
> 
> [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04125.html

I found it.  Thanks,

Alex

> >>   drivers/vfio/pci/vfio_pci.c         |   87 ++++++++++++++++++++++++++++++++---
> >>   drivers/vfio/pci/vfio_pci_private.h |    8 ++++
> >>   2 files changed, 89 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> >> index 188b1ff..3cca2a7 100644
> >> --- a/drivers/vfio/pci/vfio_pci.c
> >> +++ b/drivers/vfio/pci/vfio_pci.c
> >> @@ -110,6 +110,73 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
> >>   	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
> >>   }
> >>   
> >> +static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
> >> +{
> >> +	struct resource *res;
> >> +	int bar;
> >> +	struct vfio_pci_dummy_resource *dummy_res;
> >> +
> >> +	INIT_LIST_HEAD(&vdev->dummy_resources_list);
> >> +
> >> +	for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
> >> +		res = vdev->pdev->resource + bar;
> >> +
> >> +		if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
> >> +			goto no_mmap;
> >> +
> >> +		if (!(res->flags & IORESOURCE_MEM))
> >> +			goto no_mmap;
> >> +
> >> +		/*
> >> +		 * The PCI core shouldn't set up a resource with a
> >> +		 * type but zero size. But there may be bugs that
> >> +		 * cause us to do that.
> >> +		 */
> >> +		if (!resource_size(res))
> >> +			goto no_mmap;
> >> +
> >> +		if (resource_size(res) >= PAGE_SIZE) {
> >> +			vdev->bar_mmap_supported[bar] = true;
> >> +			continue;
> >> +		}
> >> +
> >> +		if (!(res->start & ~PAGE_MASK)) {
> >> +			/*
> >> +			 * Add a dummy resource to reserve the remainder
> >> +			 * of the exclusive page in case that hot-add
> >> +			 * device's bar is assigned into it.
> >> +			 */
> >> +			dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
> >> +			if (dummy_res == NULL)
> >> +				goto no_mmap;
> >> +
> >> +			dummy_res->resource.start = res->end + 1;
> >> +			dummy_res->resource.end = res->start + PAGE_SIZE - 1;
> >> +			dummy_res->resource.flags = res->flags;
> >> +			if (request_resource(res->parent,
> >> +						&dummy_res->resource)) {
> >> +				kfree(dummy_res);
> >> +				goto no_mmap;
> >> +			}
> >> +			dummy_res->index = bar;
> >> +			list_add(&dummy_res->res_next,
> >> +					&vdev->dummy_resources_list);
> >> +			vdev->bar_mmap_supported[bar] = true;
> >> +			continue;
> >> +		}
> >> +		/*
> >> +		 * Here we don't handle the case when the BAR is not page
> >> +		 * aligned because we can't expect the BAR will be
> >> +		 * assigned into the same location in a page in guest
> >> +		 * when we passthrough the BAR. And it's hard to access
> >> +		 * this BAR in userspace because we have no way to get
> >> +		 * the BAR's location in a page.
> >> +		 */
> >> +no_mmap:
> >> +		vdev->bar_mmap_supported[bar] = false;
> >> +	}
> >> +}
> >> +
> >>   static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
> >>   static void vfio_pci_disable(struct vfio_pci_device *vdev);
> >>   
> >> @@ -218,12 +285,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
> >>   		}
> >>   	}
> >>   
> >> +	vfio_pci_probe_mmaps(vdev);
> >> +
> >>   	return 0;
> >>   }
> >>   
> >>   static void vfio_pci_disable(struct vfio_pci_device *vdev)
> >>   {
> >>   	struct pci_dev *pdev = vdev->pdev;
> >> +	struct vfio_pci_dummy_resource *dummy_res, *tmp;
> >>   	int i, bar;
> >>   
> >>   	/* Stop the device from further DMA */
> >> @@ -252,6 +322,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
> >>   		vdev->barmap[bar] = NULL;
> >>   	}
> >>   
> >> +	list_for_each_entry_safe(dummy_res, tmp,
> >> +				 &vdev->dummy_resources_list, res_next) {
> >> +		list_del(&dummy_res->res_next);
> >> +		release_resource(&dummy_res->resource);
> >> +		kfree(dummy_res);
> >> +	}
> >> +
> >>   	vdev->needs_reset = true;
> >>   
> >>   	/*
> >> @@ -623,9 +700,7 @@ static long vfio_pci_ioctl(void *device_data,
> >>   
> >>   			info.flags = VFIO_REGION_INFO_FLAG_READ |
> >>   				     VFIO_REGION_INFO_FLAG_WRITE;
> >> -			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
> >> -			    pci_resource_flags(pdev, info.index) &
> >> -			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
> >> +			if (vdev->bar_mmap_supported[info.index]) {
> >>   				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
> >>   				if (info.index == vdev->msix_bar) {
> >>   					ret = msix_sparse_mmap_cap(vdev, &caps);
> >> @@ -1049,16 +1124,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
> >>   		return -EINVAL;
> >>   	if (index >= VFIO_PCI_ROM_REGION_INDEX)
> >>   		return -EINVAL;
> >> -	if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
> >> +	if (!vdev->bar_mmap_supported[index])
> >>   		return -EINVAL;
> >>   
> >> -	phys_len = pci_resource_len(pdev, index);
> >> +	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
> >>   	req_len = vma->vm_end - vma->vm_start;
> >>   	pgoff = vma->vm_pgoff &
> >>   		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> >>   	req_start = pgoff << PAGE_SHIFT;
> >>   
> >> -	if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
> >> +	if (req_start + req_len > phys_len)
> >>   		return -EINVAL;
> >>   
> >>   	if (index == vdev->msix_bar) {
> >> diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
> >> index 016c14a..2128de8 100644
> >> --- a/drivers/vfio/pci/vfio_pci_private.h
> >> +++ b/drivers/vfio/pci/vfio_pci_private.h
> >> @@ -57,9 +57,16 @@ struct vfio_pci_region {
> >>   	u32				flags;
> >>   };
> >>   
> >> +struct vfio_pci_dummy_resource {
> >> +	struct resource		resource;
> >> +	int			index;
> >> +	struct list_head	res_next;
> >> +};
> >> +
> >>   struct vfio_pci_device {
> >>   	struct pci_dev		*pdev;
> >>   	void __iomem		*barmap[PCI_STD_RESOURCE_END + 1];
> >> +	bool			bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
> >>   	u8			*pci_config_map;
> >>   	u8			*vconfig;
> >>   	struct perm_bits	*msi_perm;
> >> @@ -88,6 +95,7 @@ struct vfio_pci_device {
> >>   	int			refcnt;
> >>   	struct eventfd_ctx	*err_trigger;
> >>   	struct eventfd_ctx	*req_trigger;
> >> +	struct list_head	dummy_resources_list;
> >>   };
> >>   
> >>   #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)  
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux