Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 28.01.19 um 22:52 schrieb Marek Olšák:
From: Marek Olšák <marek.olsak@xxxxxxx>

Normal syncobjs signal when an IB finishes. Start syncobjs signal when
an IB starts.

That approach has quite a number of problems (for example you can't allocate memory at this point).

Better add a flag that we should only sync on scheduling for a dependency/syncobj instead.

Christian.


Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
  include/uapi/drm/amdgpu_drm.h           | 13 ++++++++++++-
  4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..8e2f7e558bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
  	struct dma_fence		*fence;
  	uint64_t			bytes_moved_threshold;
  	uint64_t			bytes_moved_vis_threshold;
  	uint64_t			bytes_moved;
  	uint64_t			bytes_moved_vis;
  	struct amdgpu_bo_list_entry	*evictable;
/* user fence */
  	struct amdgpu_bo_list_entry	uf_entry;
+ bool get_start_syncobj;
  	unsigned num_post_dep_syncobjs;
  	struct drm_syncobj **post_dep_syncobjs;
  };
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
  				      uint32_t ib_idx, int idx)
  {
  	return p->job->ibs[ib_idx].ptr[idx];
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..917f3818c61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
  		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
  					  chunk_ib->ip_instance, chunk_ib->ring,
  					  &entity);
  		if (r)
  			return r;
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
  			parser->job->preamble_status |=
  				AMDGPU_PREAMBLE_IB_PRESENT;
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
+			parser->get_start_syncobj = true;
+
  		if (parser->entity && parser->entity != entity)
  			return -EINVAL;
parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched);
  		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
  				   chunk_ib->ib_bytes : 0, ib);
  		if (r) {
  			DRM_ERROR("Failed to get ib !\n");
@@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  	amdgpu_mn_lock(p->mn);
  	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
  			r = -ERESTARTSYS;
  			goto error_abort;
  		}
  	}
+ if (p->get_start_syncobj) {
+		struct drm_syncobj *syncobj;
+
+		r = drm_syncobj_create(&syncobj, 0,
+				       &job->base.s_fence->scheduled);
+		if (r)
+			goto error_abort;
+
+		r = drm_syncobj_get_handle(p->filp, syncobj,
+					   &cs->out.start_syncobj);
+		if (r)
+			goto error_abort;
+		drm_syncobj_put(syncobj);
+	}
+
  	job->owner = p->filp;
  	p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
  	amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
  	    !p->ctx->preamble_presented) {
  		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
  		p->ctx->preamble_presented = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..a230a30722d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,23 +64,24 @@
   * - 3.18.0 - Export gpu always on cu bitmap
   * - 3.19.0 - Add support for UVD MJPEG decode
   * - 3.20.0 - Add support for local BOs
   * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
   * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
   * - 3.23.0 - Add query for VRAM lost counter
   * - 3.24.0 - Add high priority compute support for gfx9
   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
   * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
   * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
   */
  #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
  #define KMS_DRIVER_PATCHLEVEL	0
int amdgpu_vram_limit = 0;
  int amdgpu_vis_vram_limit = 0;
  int amdgpu_gart_size = -1; /* auto */
  int amdgpu_gtt_size = -1; /* auto */
  int amdgpu_moverate = -1; /* auto */
  int amdgpu_benchmarking = 0;
  int amdgpu_testing = 0;
  int amdgpu_audio = -1;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 662d379ea624..d0e0c99cea32 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
  	__u32		ctx_id;
  	/**  Handle of resource list associated with CS */
  	__u32		bo_list_handle;
  	__u32		num_chunks;
  	__u32		_pad;
  	/** this points to __u64 * which point to cs chunks */
  	__u64		chunks;
  };
struct drm_amdgpu_cs_out {
-	__u64 handle;
+	__u64 handle; /* sequence number */
+	__u32 start_syncobj; /* signalled when IB execution begins */
+	__u32 _pad;
  };
union drm_amdgpu_cs {
  	struct drm_amdgpu_cs_in in;
  	struct drm_amdgpu_cs_out out;
  };
/* Specify flags to be used for IB */ /* This IB should be submitted to CE */
@@ -566,20 +568,29 @@ union drm_amdgpu_cs {
/* The IB fence should do the L2 writeback but not invalidate any shader
   * caches (L2/vL1/sL1/I$). */
  #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
   * This will reset wave ID counters for the IB.
   */
  #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+/* The CS ioctl will return a syncobj representing when all IBs begin
+ * execution. If set, this applies to all IBs. The returned syncobj can be
+ * used as an IB dependency for other IBs.
+ *
+ * This is used for GPU deadlock prevention when userspace uses mid-IB fences
+ * to wait for mid-IB work on other rings.
+ */
+#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
+
  struct drm_amdgpu_cs_chunk_ib {
  	__u32 _pad;
  	/** AMDGPU_IB_FLAG_* */
  	__u32 flags;
  	/** Virtual address to begin IB execution */
  	__u64 va_start;
  	/** Size of submission */
  	__u32 ib_bytes;
  	/** HW IP to submit to */
  	__u32 ip_type;

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux