[PATCH] drm/gpu-sched: fix force APP kill hang(v2)

ckoenig.leichtzumerken@xxxxxxxxx (Christian König) · Wed, 4 Apr 2018 12:01:32 +0200

Am 04.04.2018 um 11:37 schrieb Emily Deng:
> issue:
> there are VMC page fault occured if force APP kill during
> 3dmark test, the cause is in entity_fini we manually signal
> all those jobs in entity's queue which confuse the sync/dep
> mechanism:
>
> 1)page fault occured in sdma's clear job which operate on
> shadow buffer, and shadow buffer's Gart table is cleaned by
> ttm_bo_release since the fence in its reservation was fake signaled
> by entity_fini() under the case of SIGKILL received.
>
> 2)page fault occured in gfx' job because during the lifetime
> of gfx job we manually fake signal all jobs from its entity
> in entity_fini(), thus the unmapping/clear PTE job depend on those
> result fence is satisfied and sdma start clearing the PTE and lead
> to GFX page fault.
>
> fix:
> 1)should at least wait all jobs already scheduled complete in entity_fini()
> if SIGKILL is the case.
>
> 2)if a fence signaled and try to clear some entity's dependency, should
> set this entity guilty to prevent its job really run since the dependency
> is fake signaled.
>
> v2:
> splitting drm_sched_entity_fini() into two functions:
> 1)The first one is does the waiting, removes the entity from the
> runqueue and returns an error when the process was killed.
> 2)The second one then goes over the entity, install it as
> completion signal for the remaining jobs and signals all jobs
> with an error code.

First of all this patch won't work like this. You need to call the first 
part before the VM teardown in amdgpu_driver_postclose_kms() and the 
second part after the VM teardown.

Then please come up with a better name than fini1 and fini2, something 
like drm_sched_entity_cleanup() or something like this.

Then I think it is harmless to call the _cleanup() function from the 
_fini() function and only change the places where it makes sense to 
explicitely call _cleanup().

E.g. add a new function amdgpu_ctx_mgr_cleanup() which calls the 
cleanup() for each still open ctx and then modify the order in 
amdgpu_driver_postclose_kms().

Regards,
Christian.

>
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 13 +++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  8 +++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  6 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c     |  3 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c     |  3 +-
>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 67 +++++++++++++++++++++++++------
>   include/drm/gpu_scheduler.h               |  7 +++-
>   10 files changed, 93 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 09d35051..69dadf9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -104,8 +104,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   
>   failed:
>   	for (j = 0; j < i; j++)
> -		drm_sched_entity_fini(&adev->rings[j]->sched,
> +		drm_sched_entity_fini1(&adev->rings[j]->sched,
>   				      &ctx->rings[j].entity);
> +
> +	for (j = 0; j < i; j++)
> +		drm_sched_entity_fini2(&adev->rings[j]->sched,
> +				      &ctx->rings[j].entity);
> +
>   	kfree(ctx->fences);
>   	ctx->fences = NULL;
>   	return r;
> @@ -126,7 +131,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>   	ctx->fences = NULL;
>   
>   	for (i = 0; i < adev->num_rings; i++)
> -		drm_sched_entity_fini(&adev->rings[i]->sched,
> +		drm_sched_entity_fini1(&adev->rings[i]->sched,
> +				      &ctx->rings[i].entity);
> +
> +	for (i = 0; i < adev->num_rings; i++)
> +		drm_sched_entity_fini2(&adev->rings[i]->sched,
>   				      &ctx->rings[i].entity);
>   
>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index d2ab404..83d46bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -131,8 +131,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
>   static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
>   {
>   	if (adev->mman.mem_global_referenced) {
> -		drm_sched_entity_fini(adev->mman.entity.sched,
> +		drm_sched_entity_fini1(adev->mman.entity.sched,
>   				      &adev->mman.entity);
> +		drm_sched_entity_fini2(adev->mman.entity.sched,
> +                      &adev->mman.entity);
>   		mutex_destroy(&adev->mman.gtt_window_lock);
>   		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
>   		drm_global_item_unref(&adev->mman.mem_global_ref);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 627542b..74a6953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -277,7 +277,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>   	int i;
>   	kfree(adev->uvd.saved_bo);
>   
> -	drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
> +	drm_sched_entity_fini1(&adev->uvd.ring.sched, &adev->uvd.entity);
> +	drm_sched_entity_fini2(&adev->uvd.ring.sched, &adev->uvd.entity);
>   
>   	amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
>   			      &adev->uvd.gpu_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index a33804b..4166d26 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -212,7 +212,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
>   	if (adev->vce.vcpu_bo == NULL)
>   		return 0;
>   
> -	drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
> +	drm_sched_entity_fini1(&adev->vce.ring[0].sched, &adev->vce.entity);
> +	drm_sched_entity_fini2(&adev->vce.ring[0].sched, &adev->vce.entity);
>   
>   	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
>   		(void **)&adev->vce.cpu_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 58e4953..268c0c3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -129,9 +129,13 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
>   
>   	kfree(adev->vcn.saved_bo);
>   
> -	drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
> +	drm_sched_entity_fini1(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
>   
> -	drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
> +	drm_sched_entity_fini1(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
> +
> +	drm_sched_entity_fini2(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
> +
> +	drm_sched_entity_fini2(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
>   
>   	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
>   			      &adev->vcn.gpu_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 2447429..084a39d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2456,7 +2456,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	vm->root.base.bo = NULL;
>   
>   error_free_sched_entity:
> -	drm_sched_entity_fini(&ring->sched, &vm->entity);
> +	drm_sched_entity_fini1(&ring->sched, &vm->entity);
> +	drm_sched_entity_fini2(&ring->sched, &vm->entity);
>   
>   	return r;
>   }
> @@ -2520,7 +2521,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>   	}
>   
> -	drm_sched_entity_fini(vm->entity.sched, &vm->entity);
> +	drm_sched_entity_fini1(vm->entity.sched, &vm->entity);
> +	drm_sched_entity_fini2(vm->entity.sched, &vm->entity);
>   
>   	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
>   		dev_err(adev->dev, "still active bo inside vm\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index f26f515..3c57a6e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -469,7 +469,8 @@ static int uvd_v6_0_sw_fini(void *handle)
>   		return r;
>   
>   	if (uvd_v6_0_enc_support(adev)) {
> -		drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
> +		drm_sched_entity_fini1(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
> +		drm_sched_entity_fini2(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
>   
>   		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
>   			amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 280c082..a422acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -472,7 +472,8 @@ static int uvd_v7_0_sw_fini(void *handle)
>   	if (r)
>   		return r;
>   
> -	drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
> +	drm_sched_entity_fini1(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
> +	drm_sched_entity_fini2(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
>   
>   	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
>   		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 310275e..abeb3ff 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -136,6 +136,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
>   	entity->rq = rq;
>   	entity->sched = sched;
>   	entity->guilty = guilty;
> +	entity->fini_status = 0;
> +	entity->finished = NULL;
>   
>   	spin_lock_init(&entity->rq_lock);
>   	spin_lock_init(&entity->queue_lock);
> @@ -197,19 +199,29 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
>   	return true;
>   }
>   
> +static void drm_sched_entity_fini_job_cb(struct dma_fence *f,
> +				    struct dma_fence_cb *cb)
> +{
> +	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> +						 finish_cb);
> +	drm_sched_fence_finished(job->s_fence);
> +	WARN_ON(job->s_fence->parent);
> +	dma_fence_put(&job->s_fence->finished);
> +	job->sched->ops->free_job(job);
> +}
> +
>   /**
>    * Destroy a context entity
>    *
>    * @sched       Pointer to scheduler instance
>    * @entity	The pointer to a valid scheduler entity
>    *
> - * Cleanup and free the allocated resources.
> + * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting,
> + * removes the entity from the runqueue and returns an error when the process was killed.
>    */
> -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
> +void drm_sched_entity_fini1(struct drm_gpu_scheduler *sched,
>   			   struct drm_sched_entity *entity)
>   {
> -	int r;
> -
>   	if (!drm_sched_entity_is_initialized(sched, entity))
>   		return;
>   	/**
> @@ -217,13 +229,27 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
>   	 * queued IBs or discard them on SIGKILL
>   	*/
>   	if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
> -		r = -ERESTARTSYS;
> +		entity->fini_status = -ERESTARTSYS;
>   	else
> -		r = wait_event_killable(sched->job_scheduled,
> +		entity->fini_status = wait_event_killable(sched->job_scheduled,
>   					drm_sched_entity_is_idle(entity));
>   	drm_sched_entity_set_rq(entity, NULL);
> -	if (r) {
> +}
> +
> +/**
> + * Destroy a context entity
> + *
> + * @sched       Pointer to scheduler instance
> + * @entity	The pointer to a valid scheduler entity
> + *
> + * The second one then goes over the entity and signals all jobs with an error code.
> + */
> +void drm_sched_entity_fini2(struct drm_gpu_scheduler *sched,
> +			   struct drm_sched_entity *entity)
> +{
> +	if (entity->fini_status) {
>   		struct drm_sched_job *job;
> +		int r;
>   
>   		/* Park the kernel for a moment to make sure it isn't processing
>   		 * our enity.
> @@ -241,14 +267,21 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
>   			struct drm_sched_fence *s_fence = job->s_fence;
>   			drm_sched_fence_scheduled(s_fence);
>   			dma_fence_set_error(&s_fence->finished, -ESRCH);
> -			drm_sched_fence_finished(s_fence);
> -			WARN_ON(s_fence->parent);
> -			dma_fence_put(&s_fence->finished);
> -			sched->ops->free_job(job);
> +            r = dma_fence_add_callback(entity->finished, &job->finish_cb,
> +                               drm_sched_entity_fini_job_cb);
> +            if (r == -ENOENT)
> +                drm_sched_entity_fini_job_cb(entity->finished, &job->finish_cb);
> +            else if (r)
> +                DRM_ERROR("fence add callback failed (%d)\n", r);
> +        }
> +        if(entity->finished){
> +            dma_fence_put(entity->finished);
> +		    entity->finished = NULL;
>   		}
>   	}
>   }
> -EXPORT_SYMBOL(drm_sched_entity_fini);
> +EXPORT_SYMBOL(drm_sched_entity_fini1);
> +EXPORT_SYMBOL(drm_sched_entity_fini2);
>   
>   static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
>   {
> @@ -530,6 +563,11 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>   		spin_unlock(&sched->job_list_lock);
>   		fence = sched->ops->run_job(s_job);
>   		atomic_inc(&sched->hw_rq_count);
> +
> +        if(s_job->entity->finished)
> +			dma_fence_put(s_job->entity->finished);
> +		s_job->entity->finished = dma_fence_get(&s_fence->finished);
> +
>   		if (fence) {
>   			s_fence->parent = dma_fence_get(fence);
>   			r = dma_fence_add_callback(fence, &s_fence->cb,
> @@ -556,6 +594,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   		       void *owner)
>   {
>   	job->sched = sched;
> +	job->entity = entity;
>   	job->s_priority = entity->rq - sched->sched_rq;
>   	job->s_fence = drm_sched_fence_create(entity, owner);
>   	if (!job->s_fence)
> @@ -669,6 +708,10 @@ static int drm_sched_main(void *param)
>   		fence = sched->ops->run_job(sched_job);
>   		drm_sched_fence_scheduled(s_fence);
>   
> +		if(entity->finished)
> +			dma_fence_put(entity->finished);
> +		entity->finished = dma_fence_get(&s_fence->finished);
> +
>   		if (fence) {
>   			s_fence->parent = dma_fence_get(fence);
>   			r = dma_fence_add_callback(fence, &s_fence->cb,
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index dfd54fb..e0a5a53 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -63,6 +63,8 @@ struct drm_sched_entity {
>   	struct dma_fence		*dependency;
>   	struct dma_fence_cb		cb;
>   	atomic_t			*guilty; /* points to ctx's guilty */
> +	uint32_t            fini_status;
> +	struct dma_fence    *finished;
>   };
>   
>   /**
> @@ -99,6 +101,7 @@ struct drm_sched_job {
>   	uint64_t			id;
>   	atomic_t			karma;
>   	enum drm_sched_priority		s_priority;
> +	struct drm_sched_entity  *entity;
>   };
>   
>   static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
> @@ -148,7 +151,9 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
>   			  struct drm_sched_entity *entity,
>   			  struct drm_sched_rq *rq,
>   			  uint32_t jobs, atomic_t *guilty);
> -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
> +void drm_sched_entity_fini1(struct drm_gpu_scheduler *sched,
> +			   struct drm_sched_entity *entity);
> +void drm_sched_entity_fini2(struct drm_gpu_scheduler *sched,
>   			   struct drm_sched_entity *entity);
>   void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>   			       struct drm_sched_entity *entity);