Am 19.09.2017 um 08:41 schrieb Monk Liu: > otherwise a gpu hang will make application couldn't be killed > under timedout=0 mode > > v2: > Fix memoryleak job/job->s_fence issue > unlock mn > remove the ERROR msg after waiting being interrupted > > Change-Id: I6051b5b3ae1188983f49325a2438c84a6c12374a > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 17 +++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 12 +++++++----- > 3 files changed, 24 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index cc9a232..6ff2959 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -736,8 +736,8 @@ struct amdgpu_ctx_mgr { > struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); > int amdgpu_ctx_put(struct amdgpu_ctx *ctx); > > -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > - struct dma_fence *fence); > +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > + struct dma_fence *fence, uint64_t *seq); > struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, > struct amdgpu_ring *ring, uint64_t seq); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index b59749d..9bd4834 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1043,6 +1043,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; > struct amdgpu_job *job; > unsigned i; > + uint64_t seq; > + > int r; > > amdgpu_mn_lock(p->mn); > @@ -1071,8 +1073,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > job->owner = p->filp; > job->fence_ctx = entity->fence_context; > p->fence = dma_fence_get(&job->base.s_fence->finished); > - cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); > - job->uf_sequence = cs->out.handle; > + r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); > + if (r) { > + /* release job include the sched fence as well */ > + dma_fence_put(&job->base.s_fence->finished); > + dma_fence_put(&job->base.s_fence->scheduled); > + amdgpu_job_free(job); > + amdgpu_mn_unlock(p->mn); > + dma_fence_put(p->fence); > + return r; > + } > + > + cs->out.handle = seq; > + job->uf_sequence = seq; > amdgpu_job_free_resources(job); > > trace_amdgpu_cs_ioctl(job); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index a11e443..551f114 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) > return 0; > } > > -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > - struct dma_fence *fence) > +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > + struct dma_fence *fence, uint64_t* handler) > { > struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; > uint64_t seq = cring->sequence; > @@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > other = cring->fences[idx]; > if (other) { > signed long r; > - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); > + r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); > if (r < 0) > - DRM_ERROR("Error (%ld) waiting for fence!\n", r); > + return -ERESTARTSYS; Return the original error code here, e.g. "r". With that fixed the patch is Reviewed-by: Christian König <christian.koenig at amd.com> Regards, Christian. > } > > dma_fence_get(fence); > @@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > spin_unlock(&ctx->ring_lock); > > dma_fence_put(other); > + if (handler) > + *handler = seq; > > - return seq; > + return 0; > } > > struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,