[PATCH] drm/gpu-sched: fix force APP kill hang(v3)

monk.liu@xxxxxxx (monk) · Fri, 13 Apr 2018 13:00:40 +0800

Hi Christian & Emily

This v3 version looks pretty good to me, but still some parts need to 
improve:

e.g.

1)entity->finished doesn't presenting what it really means, better 
rename it to entity->last_scheduled.

2)drm_sched_entity_fini_job_cb() better renamed to 
drm_sched_entity_kill_jobs_cb()

3) no need to pass "entity->finished" (line275 of gpu_schedulers.c) to 
drm_sched_entity_fini_job_cb() if -ENOENT returned after 
dma_fence_add_callback since this parm is not needed at all in this 
callback routine

4)better change type of entity->fini_status to "int" instead of 
"uint32_t" ... it should be aligned with the return type of 
wait_event_killable()

5)

+		if (entity->finished) {
+			dma_fence_put(entity->finished);
+			entity->finished = NULL;
  		}

no need to check entity->finished because dma_fence_put() will do it inside...

and the same here in job_recovery:

+
+		if (s_job->entity->finished)
+			dma_fence_put(s_job->entity->finished);

and the same here in sched_main:

+		if (entity->finished)
+			dma_fence_put(entity->finished);

6) why put amdgpu_ctx_mgr_fini() beneath amdgpu_vm_fini() ? any reason for that ?

thanks

/Monk

On 04/13/2018 10:06 AM, Deng, Emily wrote:
> Ping....
>
> Best Wishes,
> Emily Deng
>
>
>
>
>> -----Original Message-----
>> From: Emily Deng [mailto:Emily.Deng at amd.com]
>> Sent: Thursday, April 12, 2018 6:22 PM
>> To: amd-gfx at lists.freedesktop.org
>> Cc: Deng, Emily <Emily.Deng at amd.com>; Liu, Monk <Monk.Liu at amd.com>
>> Subject: [PATCH] drm/gpu-sched: fix force APP kill hang(v3)
>>
>> issue:
>> there are VMC page fault occurred if force APP kill during 3dmark test, the
>> cause is in entity_fini we manually signal all those jobs in entity's queue
>> which confuse the sync/dep
>> mechanism:
>>
>> 1)page fault occurred in sdma's clear job which operate on shadow buffer,
>> and shadow buffer's Gart table is cleaned by ttm_bo_release since the fence
>> in its reservation was fake signaled by entity_fini() under the case of SIGKILL
>> received.
>>
>> 2)page fault occurred in gfx' job because during the lifetime of gfx job we
>> manually fake signal all jobs from its entity in entity_fini(), thus the
>> unmapping/clear PTE job depend on those result fence is satisfied and sdma
>> start clearing the PTE and lead to GFX page fault.
>>
>> fix:
>> 1)should at least wait all jobs already scheduled complete in entity_fini() if
>> SIGKILL is the case.
>>
>> 2)if a fence signaled and try to clear some entity's dependency, should set
>> this entity guilty to prevent its job really run since the dependency is fake
>> signaled.
>>
>> v2:
>> splitting drm_sched_entity_fini() into two functions:
>> 1)The first one is does the waiting, removes the entity from the runqueue
>> and returns an error when the process was killed.
>> 2)The second one then goes over the entity, install it as completion signal for
>> the remaining jobs and signals all jobs with an error code.
>>
>> v3:
>> 1)Replace the fini1 and fini2 with better name 2)Call the first part before the
>> VM teardown in
>> amdgpu_driver_postclose_kms() and the second part after the VM teardown
>> 3)Keep the original function drm_sched_entity_fini to refine the code.
>>
>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 64
>> ++++++++++++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  5 ++-
>>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 74
>> ++++++++++++++++++++++++++-----
>>   include/drm/gpu_scheduler.h               |  7 +++
>>   5 files changed, 131 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 2babfad..200db73 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -681,6 +681,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void
>> *data,  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
>> ring_id);
>>
>>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
>> +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr); void
>> +amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
>>   void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>>
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 09d35051..659add4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -111,8 +111,9 @@ static int amdgpu_ctx_init(struct amdgpu_device
>> *adev,
>>   	return r;
>>   }
>>
>> -static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>> +static void amdgpu_ctx_fini(struct kref *ref)
>>   {
>> +	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,
>> +refcount);
>>   	struct amdgpu_device *adev = ctx->adev;
>>   	unsigned i, j;
>>
>> @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx
>> *ctx)
>>   	kfree(ctx->fences);
>>   	ctx->fences = NULL;
>>
>> -	for (i = 0; i < adev->num_rings; i++)
>> -		drm_sched_entity_fini(&adev->rings[i]->sched,
>> -				      &ctx->rings[i].entity);
>> -
>>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>>
>>   	mutex_destroy(&ctx->lock);
>> +
>> +	kfree(ctx);
>>   }
>>
>>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -170,12
>> +169,15 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>> static void amdgpu_ctx_do_release(struct kref *ref)  {
>>   	struct amdgpu_ctx *ctx;
>> +	u32 i;
>>
>>   	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>>
>> -	amdgpu_ctx_fini(ctx);
>> +	for (i = 0; i < ctx->adev->num_rings; i++)
>> +		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
>> +							&ctx->rings[i].entity);
>>
>> -	kfree(ctx);
>> +	amdgpu_ctx_fini(ref);
>>   }
>>
>>   static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) @@ -
>> 435,16 +437,62 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr
>> *mgr)
>>   	idr_init(&mgr->ctx_handles);
>>   }
>>
>> +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) {
>> +	struct amdgpu_ctx *ctx;
>> +	struct idr *idp;
>> +	uint32_t id, i;
>> +
>> +	idp = &mgr->ctx_handles;
>> +
>> +	idr_for_each_entry(idp, ctx, id) {
>> +
>> +		if (!ctx->adev)
>> +			return;
>> +
>> +		for (i = 0; i < ctx->adev->num_rings; i++)
>> +			if (kref_read(&ctx->refcount) == 1)
>> +				drm_sched_entity_do_release(&ctx->adev-
>>> rings[i]->sched,
>> +						  &ctx->rings[i].entity);
>> +			else
>> +				DRM_ERROR("ctx %p is still alive\n", ctx);
>> +	}
>> +}
>> +
>> +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) {
>> +	struct amdgpu_ctx *ctx;
>> +	struct idr *idp;
>> +	uint32_t id, i;
>> +
>> +	idp = &mgr->ctx_handles;
>> +
>> +	idr_for_each_entry(idp, ctx, id) {
>> +
>> +		if (!ctx->adev)
>> +			return;
>> +
>> +		for (i = 0; i < ctx->adev->num_rings; i++)
>> +			if (kref_read(&ctx->refcount) == 1)
>> +				drm_sched_entity_cleanup(&ctx->adev-
>>> rings[i]->sched,
>> +					&ctx->rings[i].entity);
>> +			else
>> +				DRM_ERROR("ctx %p is still alive\n", ctx);
>> +	}
>> +}
>> +
>>   void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)  {
>>   	struct amdgpu_ctx *ctx;
>>   	struct idr *idp;
>>   	uint32_t id;
>>
>> +	amdgpu_ctx_mgr_entity_cleanup(mgr);
>> +
>>   	idp = &mgr->ctx_handles;
>>
>>   	idr_for_each_entry(idp, ctx, id) {
>> -		if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
>> +		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
>>   			DRM_ERROR("ctx %p is still alive\n", ctx);
>>   	}
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 487d39e..6cbb427 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -913,8 +913,7 @@ void amdgpu_driver_postclose_kms(struct
>> drm_device *dev,
>>   		return;
>>
>>   	pm_runtime_get_sync(dev->dev);
>> -
>> -	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
>> +	amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
>>
>>   	if (adev->asic_type != CHIP_RAVEN) {
>>   		amdgpu_uvd_free_handles(adev, file_priv); @@ -935,6
>> +934,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
>>
>>   	amdgpu_vm_fini(adev, &fpriv->vm);
>> +	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
>> +
>>   	if (pasid)
>>   		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
>>   	amdgpu_bo_unref(&pd);
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> index 310275e..9062d44 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> @@ -136,6 +136,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler
>> *sched,
>>   	entity->rq = rq;
>>   	entity->sched = sched;
>>   	entity->guilty = guilty;
>> +	entity->fini_status = 0;
>> +	entity->finished = NULL;
>>
>>   	spin_lock_init(&entity->rq_lock);
>>   	spin_lock_init(&entity->queue_lock);
>> @@ -197,19 +199,30 @@ static bool drm_sched_entity_is_ready(struct
>> drm_sched_entity *entity)
>>   	return true;
>>   }
>>
>> +static void drm_sched_entity_fini_job_cb(struct dma_fence *f,
>> +				    struct dma_fence_cb *cb)
>> +{
>> +	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>> +						 finish_cb);
>> +	drm_sched_fence_finished(job->s_fence);
>> +	WARN_ON(job->s_fence->parent);
>> +	dma_fence_put(&job->s_fence->finished);
>> +	job->sched->ops->free_job(job);
>> +}
>> +
>> +
>>   /**
>>    * Destroy a context entity
>>    *
>>    * @sched       Pointer to scheduler instance
>>    * @entity	The pointer to a valid scheduler entity
>>    *
>> - * Cleanup and free the allocated resources.
>> + * Splitting drm_sched_entity_fini() into two functions, The first one
>> + is does the waiting,
>> + * removes the entity from the runqueue and returns an error when the
>> process was killed.
>>    */
>> -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
>> +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
>>   			   struct drm_sched_entity *entity)
>>   {
>> -	int r;
>> -
>>   	if (!drm_sched_entity_is_initialized(sched, entity))
>>   		return;
>>   	/**
>> @@ -217,13 +230,28 @@ void drm_sched_entity_fini(struct
>> drm_gpu_scheduler *sched,
>>   	 * queued IBs or discard them on SIGKILL
>>   	*/
>>   	if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
>> -		r = -ERESTARTSYS;
>> +		entity->fini_status = -ERESTARTSYS;
>>   	else
>> -		r = wait_event_killable(sched->job_scheduled,
>> +		entity->fini_status = wait_event_killable(sched-
>>> job_scheduled,
>>   					drm_sched_entity_is_idle(entity));
>>   	drm_sched_entity_set_rq(entity, NULL);
>> -	if (r) {
>> +}
>> +EXPORT_SYMBOL(drm_sched_entity_do_release);
>> +
>> +/**
>> + * Destroy a context entity
>> + *
>> + * @sched       Pointer to scheduler instance
>> + * @entity	The pointer to a valid scheduler entity
>> + *
>> + * The second one then goes over the entity and signals all jobs with an
>> error code.
>> + */
>> +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
>> +			   struct drm_sched_entity *entity)
>> +{
>> +	if (entity->fini_status) {
>>   		struct drm_sched_job *job;
>> +		int r;
>>
>>   		/* Park the kernel for a moment to make sure it isn't
>> processing
>>   		 * our enity.
>> @@ -241,13 +269,28 @@ void drm_sched_entity_fini(struct
>> drm_gpu_scheduler *sched,
>>   			struct drm_sched_fence *s_fence = job->s_fence;
>>   			drm_sched_fence_scheduled(s_fence);
>>   			dma_fence_set_error(&s_fence->finished, -ESRCH);
>> -			drm_sched_fence_finished(s_fence);
>> -			WARN_ON(s_fence->parent);
>> -			dma_fence_put(&s_fence->finished);
>> -			sched->ops->free_job(job);
>> +			r = dma_fence_add_callback(entity->finished, &job-
>>> finish_cb,
>> +
>> 	drm_sched_entity_fini_job_cb);
>> +			if (r == -ENOENT)
>> +				drm_sched_entity_fini_job_cb(entity-
>>> finished, &job->finish_cb);
>> +			else if (r)
>> +				DRM_ERROR("fence add callback failed
>> (%d)\n", r);
>> +		}
>> +
>> +		if (entity->finished) {
>> +			dma_fence_put(entity->finished);
>> +			entity->finished = NULL;
>>   		}
>>   	}
>>   }
>> +EXPORT_SYMBOL(drm_sched_entity_cleanup);
>> +
>> +void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
>> +				struct drm_sched_entity *entity)
>> +{
>> +	drm_sched_entity_do_release(sched, entity);
>> +	drm_sched_entity_cleanup(sched, entity); }
>>   EXPORT_SYMBOL(drm_sched_entity_fini);
>>
>>   static void drm_sched_entity_wakeup(struct dma_fence *f, struct
>> dma_fence_cb *cb) @@ -530,6 +573,11 @@ void
>> drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>>   		spin_unlock(&sched->job_list_lock);
>>   		fence = sched->ops->run_job(s_job);
>>   		atomic_inc(&sched->hw_rq_count);
>> +
>> +		if (s_job->entity->finished)
>> +			dma_fence_put(s_job->entity->finished);
>> +		s_job->entity->finished = dma_fence_get(&s_fence-
>>> finished);
>> +
>>   		if (fence) {
>>   			s_fence->parent = dma_fence_get(fence);
>>   			r = dma_fence_add_callback(fence, &s_fence->cb,
>> @@ -556,6 +604,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>   		       void *owner)
>>   {
>>   	job->sched = sched;
>> +	job->entity = entity;
>>   	job->s_priority = entity->rq - sched->sched_rq;
>>   	job->s_fence = drm_sched_fence_create(entity, owner);
>>   	if (!job->s_fence)
>> @@ -668,6 +717,9 @@ static int drm_sched_main(void *param)
>>
>>   		fence = sched->ops->run_job(sched_job);
>>   		drm_sched_fence_scheduled(s_fence);
>> +		if (entity->finished)
>> +			dma_fence_put(entity->finished);
>> +		entity->finished = dma_fence_get(&s_fence->finished);
>>
>>   		if (fence) {
>>   			s_fence->parent = dma_fence_get(fence); diff --git
>> a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index
>> c053a32..a0dd947 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -65,6 +65,8 @@ struct drm_sched_entity {
>>   	struct dma_fence		*dependency;
>>   	struct dma_fence_cb		cb;
>>   	atomic_t			*guilty; /* points to ctx's guilty */
>> +	uint32_t            fini_status;
>> +	struct dma_fence    *finished;
>>   };
>>
>>   /**
>> @@ -119,6 +121,7 @@ struct drm_sched_job {
>>   	uint64_t			id;
>>   	atomic_t			karma;
>>   	enum drm_sched_priority		s_priority;
>> +	struct drm_sched_entity  *entity;
>>   };
>>
>>   static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
>> @@ -186,6 +189,10 @@ int drm_sched_entity_init(struct
>> drm_gpu_scheduler *sched,
>>   			  struct drm_sched_entity *entity,
>>   			  struct drm_sched_rq *rq,
>>   			  uint32_t jobs, atomic_t *guilty);
>> +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
>> +			   struct drm_sched_entity *entity); void
>> +drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
>> +			   struct drm_sched_entity *entity);
>>   void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
>>   			   struct drm_sched_entity *entity);  void
>> drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>> --
>> 2.7.4