On 8 May 2018 at 10:05, <changbin.du@xxxxxxxxx> wrote: > From: Changbin Du <changbin.du@xxxxxxxxx> > > To support huge gtt, we need to support huge pages in kvmgt first. > This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page > API and implements it in kvmgt. > > v2: rebase. > > Signed-off-by: Changbin Du <changbin.du@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gvt/gtt.c | 6 +- > drivers/gpu/drm/i915/gvt/hypercall.h | 2 +- > drivers/gpu/drm/i915/gvt/kvmgt.c | 130 +++++++++++++++++++++++++---------- > drivers/gpu/drm/i915/gvt/mpt.h | 7 +- > 4 files changed, 101 insertions(+), 44 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c > index 2f13464..ffeecda 100644 > --- a/drivers/gpu/drm/i915/gvt/gtt.c > +++ b/drivers/gpu/drm/i915/gvt/gtt.c > @@ -1104,7 +1104,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, > > for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { > ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, > - start_gfn + i, &dma_addr); > + start_gfn + i, PAGE_SIZE, &dma_addr); > if (ret) > return ret; > > @@ -1150,7 +1150,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, > }; > > /* direct shadow */ > - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); > + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); > if (ret) > return -ENXIO; > > @@ -2078,7 +2078,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, > } > > ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, > - &dma_addr); > + PAGE_SIZE, &dma_addr); > if (ret) { > gvt_vgpu_err("fail to populate guest ggtt entry\n"); > /* guest driver may read/write the entry when partial > diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h > index f6dd9f7..5af11cf 100644 > --- a/drivers/gpu/drm/i915/gvt/hypercall.h > +++ b/drivers/gpu/drm/i915/gvt/hypercall.h > @@ -53,7 +53,7 @@ struct intel_gvt_mpt { > unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); > > int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, > - dma_addr_t *dma_addr); > + unsigned long size, dma_addr_t *dma_addr); > void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); > > int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c > index df4e4a0..4d2f53a 100644 > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c > @@ -94,6 +94,7 @@ struct gvt_dma { > struct rb_node dma_addr_node; > gfn_t gfn; > dma_addr_t dma_addr; > + unsigned long size; > struct kref ref; > }; > > @@ -106,51 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev); > static void intel_vgpu_release_work(struct work_struct *work); > static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); > > -static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, > - dma_addr_t *dma_addr) > +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, > + unsigned long size) > { > - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; > - struct page *page; > - unsigned long pfn; > + int total_pages; > + int npage; > int ret; > > - /* Pin the page first. */ > - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, > - IOMMU_READ | IOMMU_WRITE, &pfn); > - if (ret != 1) { > - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", > - gfn, ret); > - return -EINVAL; > + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; > + > + for (npage = 0; npage < total_pages; npage++) { > + unsigned long cur_gfn = gfn + npage; > + > + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); > + WARN_ON(ret != 1); > } > +} > > - if (!pfn_valid(pfn)) { > - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); > - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); > - return -EINVAL; > +/* Pin a normal or compound guest page for dma. */ > +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, > + unsigned long size, struct page **page) > +{ > + unsigned long base_pfn = 0; > + int total_pages; > + int npage; > + int ret; > + > + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; > + /* > + * We pin the pages one-by-one to avoid allocating a big arrary > + * on stack to hold pfns. > + */ > + for (npage = 0; npage < total_pages; npage++) { > + unsigned long cur_gfn = gfn + npage; > + unsigned long pfn; > + > + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, > + IOMMU_READ | IOMMU_WRITE, &pfn); > + if (ret != 1) { > + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", > + cur_gfn, ret); > + goto err; > + } > + > + if (!pfn_valid(pfn)) { > + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); > + npage++; > + ret = -EFAULT; > + goto err; > + } > + > + if (npage == 0) > + base_pfn = pfn; > + else if (base_pfn + npage != pfn) { > + gvt_vgpu_err("The pages are not continuous\n"); > + ret = -EINVAL; > + npage++; > + goto err; > + } > } > > + *page = pfn_to_page(base_pfn); > + return 0; > +err: > + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); > + return ret; > +} > + > +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, > + dma_addr_t *dma_addr, unsigned long size) > +{ > + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; > + struct page *page = NULL; > + int ret; > + > + ret = gvt_pin_guest_page(vgpu, gfn, size, &page); > + if (ret) > + return ret; > + > /* Setup DMA mapping. */ > - page = pfn_to_page(pfn); > - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, > - PCI_DMA_BIDIRECTIONAL); > - if (dma_mapping_error(dev, *dma_addr)) { > - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); > - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); > - return -ENOMEM; > + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); Do we not need to check if the dma addr we get back is not aligned to the requested page-size, where we would then fall back to splitting the 2M shadow entry? _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx