Am 18.09.2017 um 08:11 schrieb Monk Liu: > otherwise a gpu hang will make application couldn't be killed > > Change-Id: I6051b5b3ae1188983f49325a2438c84a6c12374a > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 14 +++++++++----- > 3 files changed, 21 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index cc9a232..6ff2959 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -736,8 +736,8 @@ struct amdgpu_ctx_mgr { > struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); > int amdgpu_ctx_put(struct amdgpu_ctx *ctx); > > -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > - struct dma_fence *fence); > +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > + struct dma_fence *fence, uint64_t *seq); > struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, > struct amdgpu_ring *ring, uint64_t seq); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index b59749d..4ac7a92 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1043,6 +1043,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; > struct amdgpu_job *job; > unsigned i; > + uint64_t seq; > + > int r; > > amdgpu_mn_lock(p->mn); > @@ -1071,8 +1073,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > job->owner = p->filp; > job->fence_ctx = entity->fence_context; > p->fence = dma_fence_get(&job->base.s_fence->finished); > - cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); > - job->uf_sequence = cs->out.handle; > + r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); > + if (r) { > + dma_fence_put(p->fence); > + return r; This will memory leak the job and you need to call amdgpu_mn_unlock() before returning. > + } > + > + cs->out.handle = seq; > + job->uf_sequence = seq; > amdgpu_job_free_resources(job); > > trace_amdgpu_cs_ioctl(job); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index a11e443..97f8be4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) > return 0; > } > > -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > - struct dma_fence *fence) > +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > + struct dma_fence *fence, uint64_t* handler) > { > struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; > uint64_t seq = cring->sequence; > @@ -258,9 +258,11 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > other = cring->fences[idx]; > if (other) { > signed long r; > - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); > - if (r < 0) > + r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); > + if (r < 0) { > DRM_ERROR("Error (%ld) waiting for fence!\n", r); Drop the extra error message here. Receiving an signal is not something that should trigger an extra message in the logs > + return -ERESTARTSYS; And return the original error code here. Apart from that looks good to me, Christian. > + } > } > > dma_fence_get(fence); > @@ -271,8 +273,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > spin_unlock(&ctx->ring_lock); > > dma_fence_put(other); > + if (handler) > + *handler = seq; > > - return seq; > + return 0; > } > > struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,