Re: [PATCH 5/5] drm/amdgpu: add independent DMA-buf export v3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 22, 2018 at 03:39:25PM +0200, Christian König wrote:
> Am 18.06.2018 um 10:28 schrieb Daniel Vetter:
> > On Fri, Jun 01, 2018 at 02:00:20PM +0200, Christian König wrote:
> > > The caching of SGT's done by the DRM code is actually quite harmful and
> > > should probably removed altogether in the long term.
> > Hm, why is it harmful? We've done it because it's expensive, and people
> > started screaming about the overhead ... hence the caching.
> 
> It's a bad idea because it artificially extends the lifetime of the mapping,
> which is exactly what we want to avoid when we create the mappings
> dynamically and only as long as necessary.
> 
> And actually checking the mail history I could only find indicators that
> people thought it would help. I haven't found any evidence that anybody
> actually checked if that helps or not.

Hm, I do have some memories of people being really upset about the
constantly rewriting of IOMMU pagetables. This was for display-only
drivers where forcing everyone to have their own private caching
implementation is a bit silly.

> > Doing an amdgpu copypasta seems like working around issues in shared code.
> 
> Completely agree, but I don't want to push that policy to everybody just
> yet.
> 
> When we find that this is really the right direction I can convert Radeon as
> well, move more component functionality logic into common code and when
> that's done rip out this middle layer because it just becomes superfluous.

I expect upset people if we remove the mapping caching. Maybe we should
change it to not cache the mapping if the exporter has the dynamic
unbinding stuff supported? Or maybe even when both exporter and importer
have it supported.

Wrt the midlayer: That's because nvidia blob + EXPORT_SYMBOL_GPL :-)

Cheers, Daniel


> 
> Christian.
> 
> > -Daniel
> > 
> > > Start by providing a separate DMA-buf export implementation in amdgpu. This is
> > > also a prerequisite of unpinned DMA-buf handling.
> > > 
> > > v2: fix unintended recursion, remove debugging leftovers
> > > v3: split out from unpinned DMA-buf work
> > > 
> > > Signed-off-by: Christian König <christian.koenig@xxxxxxx>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  1 -
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  1 -
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 73 ++++++++++++++-----------------
> > >   3 files changed, 32 insertions(+), 43 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > index 2d7500921c0b..93dc57d74fc2 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > @@ -373,7 +373,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
> > >   void amdgpu_gem_object_close(struct drm_gem_object *obj,
> > >   				struct drm_file *file_priv);
> > >   unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
> > > -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
> > >   struct drm_gem_object *
> > >   amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> > >   				 struct dma_buf_attachment *attach,
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > index b0bf2f24da48..270b8ad927ea 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > @@ -907,7 +907,6 @@ static struct drm_driver kms_driver = {
> > >   	.gem_prime_export = amdgpu_gem_prime_export,
> > >   	.gem_prime_import = amdgpu_gem_prime_import,
> > >   	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
> > > -	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
> > >   	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
> > >   	.gem_prime_vmap = amdgpu_gem_prime_vmap,
> > >   	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > > index a156b3891a3f..0c5a75b06648 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > > @@ -32,14 +32,6 @@
> > >   static const struct dma_buf_ops amdgpu_dmabuf_ops;
> > > -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
> > > -{
> > > -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> > > -	int npages = bo->tbo.num_pages;
> > > -
> > > -	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
> > > -}
> > > -
> > >   void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
> > >   {
> > >   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> > > @@ -132,23 +124,17 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> > >   	return ERR_PTR(ret);
> > >   }
> > > -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
> > > -				 struct dma_buf_attachment *attach)
> > > +static struct sg_table *
> > > +amdgpu_gem_map_dma_buf(struct dma_buf_attachment *attach,
> > > +		       enum dma_data_direction dir)
> > >   {
> > > +	struct dma_buf *dma_buf = attach->dmabuf;
> > >   	struct drm_gem_object *obj = dma_buf->priv;
> > >   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> > >   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > > +	struct sg_table *sgt;
> > >   	long r;
> > > -	r = drm_gem_map_attach(dma_buf, attach);
> > > -	if (r)
> > > -		return r;
> > > -
> > > -	r = amdgpu_bo_reserve(bo, false);
> > > -	if (unlikely(r != 0))
> > > -		goto error_detach;
> > > -
> > > -
> > >   	if (attach->dev->driver != adev->dev->driver) {
> > >   		/*
> > >   		 * Wait for all shared fences to complete before we switch to future
> > > @@ -159,46 +145,53 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
> > >   							MAX_SCHEDULE_TIMEOUT);
> > >   		if (unlikely(r < 0)) {
> > >   			DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
> > > -			goto error_unreserve;
> > > +			return ERR_PTR(r);
> > >   		}
> > >   	}
> > >   	/* pin buffer into GTT */
> > >   	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
> > >   	if (r)
> > > -		goto error_unreserve;
> > > +		return ERR_PTR(r);
> > > +
> > > +	sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
> > > +	if (IS_ERR(sgt))
> > > +		return sgt;
> > > +
> > > +	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
> > > +			      DMA_ATTR_SKIP_CPU_SYNC))
> > > +		goto error_free;
> > >   	if (attach->dev->driver != adev->dev->driver)
> > >   		bo->prime_shared_count++;
> > > -error_unreserve:
> > > -	amdgpu_bo_unreserve(bo);
> > > +	return sgt;
> > > -error_detach:
> > > -	if (r)
> > > -		drm_gem_map_detach(dma_buf, attach);
> > > -	return r;
> > > +error_free:
> > > +	sg_free_table(sgt);
> > > +	kfree(sgt);
> > > +	return ERR_PTR(-ENOMEM);
> > >   }
> > > -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
> > > -				  struct dma_buf_attachment *attach)
> > > +static void amdgpu_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
> > > +				     struct sg_table *sgt,
> > > +				     enum dma_data_direction dir)
> > >   {
> > > +	struct dma_buf *dma_buf = attach->dmabuf;
> > >   	struct drm_gem_object *obj = dma_buf->priv;
> > >   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> > >   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > > -	int ret = 0;
> > > -
> > > -	ret = amdgpu_bo_reserve(bo, true);
> > > -	if (unlikely(ret != 0))
> > > -		goto error;
> > >   	amdgpu_bo_unpin(bo);
> > > +
> > >   	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> > >   		bo->prime_shared_count--;
> > > -	amdgpu_bo_unreserve(bo);
> > > -error:
> > > -	drm_gem_map_detach(dma_buf, attach);
> > > +	if (sgt) {
> > > +		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
> > > +		sg_free_table(sgt);
> > > +		kfree(sgt);
> > > +	}
> > >   }
> > >   struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> > > @@ -237,10 +230,8 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
> > >   }
> > >   static const struct dma_buf_ops amdgpu_dmabuf_ops = {
> > > -	.attach = amdgpu_gem_map_attach,
> > > -	.detach = amdgpu_gem_map_detach,
> > > -	.map_dma_buf = drm_gem_map_dma_buf,
> > > -	.unmap_dma_buf = drm_gem_unmap_dma_buf,
> > > +	.map_dma_buf = amdgpu_gem_map_dma_buf,
> > > +	.unmap_dma_buf = amdgpu_gem_unmap_dma_buf,
> > >   	.release = drm_gem_dmabuf_release,
> > >   	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
> > >   	.map = drm_gem_dmabuf_kmap,
> > > -- 
> > > 2.14.1
> > > 
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@xxxxxxxxxxxxxxxxxxxxx
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux