[PATCH v4] drm/amdgpu: Transfer fences to dmabuf importer

christian.koenig@xxxxxxx (Christian König) · Tue, 7 Aug 2018 19:57:16 +0200

Am 07.08.2018 um 18:08 schrieb Chris Wilson:
> amdgpu only uses shared-fences internally, but dmabuf importers rely on
> implicit write hazard tracking via the reservation_object.fence_excl.
> For example, the importer use the write hazard for timing a page flip to
> only occur after the exporter has finished flushing its write into the
> surface. As such, on exporting a dmabuf, we must either flush all
> outstanding fences (for we do not know which are writes and should have
> been exclusive) or alternatively create a new exclusive fence that is
> the composite of all the existing shared fences, and so will only be
> signaled when all earlier fences are signaled (ensuring that we can not
> be signaled before the completion of any earlier write).
>
> v2: reservation_object is already locked by amdgpu_bo_reserve()
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107341
> Testcase: igt/amd_prime/amd-to-i915
> References: 8e94a46c1770 ("drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5)")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: "Christian KÃ¶nig" <christian.koenig at amd.com>
> ---
>
> This time, hopefully proofread and references complete.
> -Chris
>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 68 ++++++++++++++++++++---
>   1 file changed, 60 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 1c5d97f4b4dd..dff2b01a3d89 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -37,6 +37,7 @@
>   #include "amdgpu_display.h"
>   #include <drm/amdgpu_drm.h>
>   #include <linux/dma-buf.h>
> +#include <linux/dma-fence-array.h>
>   
>   static const struct dma_buf_ops amdgpu_dmabuf_ops;
>   
> @@ -188,6 +189,57 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>   	return ERR_PTR(ret);
>   }
>   
> +static int
> +__reservation_object_make_exclusive(struct reservation_object *obj)
> +{
> +	struct reservation_object_list *fobj;
> +	struct dma_fence_array *array;
> +	struct dma_fence **fences;
> +	unsigned int count, i;
> +
> +	fobj = reservation_object_get_list(obj);
> +	if (!fobj)
> +		return 0;
> +
> +	count = !!rcu_access_pointer(obj->fence_excl);
> +	count += fobj->shared_count;
> +
> +	fences = kmalloc_array(count, sizeof(*fences), GFP_KERNEL);
> +	if (!fences)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < fobj->shared_count; i++) {
> +		struct dma_fence *f =
> +			rcu_dereference_protected(fobj->shared[i],
> +						  reservation_object_held(obj));
> +
> +		fences[i] = dma_fence_get(f);
> +	}
> +
> +	if (rcu_access_pointer(obj->fence_excl)) {
> +		struct dma_fence *f =
> +			rcu_dereference_protected(obj->fence_excl,
> +						  reservation_object_held(obj));
> +
> +		fences[i] = dma_fence_get(f);
> +	}
> +
> +	array = dma_fence_array_create(count, fences,
> +				       dma_fence_context_alloc(1), 0,
> +				       false);
> +	if (!array)
> +		goto err_fences_put;
> +
> +	reservation_object_add_excl_fence(obj, &array->base);
> +	return 0;
> +
> +err_fences_put:
> +	for (i = 0; i < count; i++)
> +		dma_fence_put(fences[i]);
> +	kfree(fences);
> +	return -ENOMEM;
> +}
> +

This can be simplified a lot. See amdgpu_pasid_free_delayed() for an 
example:
> Â Â Â Â Â Â Â  r = reservation_object_get_fences_rcu(resv, NULL, &count, 
> &fences);
> Â Â Â Â Â Â Â  if (r)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  goto fallback;
>
> Â Â Â Â Â Â Â  if (count == 0) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_pasid_free(pasid);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return;
> Â Â Â Â Â Â Â  }
>
> Â Â Â Â Â Â Â  if (count == 1) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  fence = fences[0];
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  kfree(fences);
> Â Â Â Â Â Â Â  } else {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  uint64_t context = dma_fence_context_alloc(1);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct dma_fence_array *array;
>
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  array = dma_fence_array_create(count, fences, context,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  1, false);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (!array) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  kfree(fences);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  goto fallback;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  fence = &array->base;
> Â Â Â Â Â Â Â  }

Regards,
Christian.

>   /**
>    * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
>    * @dma_buf: shared DMA buffer
> @@ -219,16 +271,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
>   
>   	if (attach->dev->driver != adev->dev->driver) {
>   		/*
> -		 * Wait for all shared fences to complete before we switch to future
> -		 * use of exclusive fence on this prime shared bo.
> +		 * We only create shared fences for internal use, but importers
> +		 * of the dmabuf rely on exclusive fences for implicitly
> +		 * tracking write hazards. As any of the current fences may
> +		 * correspond to a write, we need to convert all existing
> +		 * fences on the reservation object into a single exclusive
> +		 * fence.
>   		 */
> -		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
> -							true, false,
> -							MAX_SCHEDULE_TIMEOUT);
> -		if (unlikely(r < 0)) {
> -			DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
> +		r = __reservation_object_make_exclusive(bo->tbo.resv);
> +		if (r)
>   			goto error_unreserve;
> -		}
>   	}
>   
>   	/* pin buffer into GTT */