you added the ctx mutex to keep the job pushing in order, that's good, Acked-by: Chunming Zhou <david1.zhou at amd.com> BTW: after you added ctx mutex for cs, I think the thread lock in libdrm isn't need any more, we can remove it now. Regards, David Zhou On 2017å¹´10æ??07æ?¥ 02:20, Andrey Grodzovsky wrote: > From: Monk Liu <Monk.Liu at amd.com> > > need to unreserve ttm bo before "cs_add_fence" and "entity_push_job" > otherwise there will be deadlock between "recover_vram_from_shadow" > and previous two routines on the ttm bo's resv lock. > > v2: > Add per ctx mutex. > > v3: > Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release > into amdgpu_cs_parser_fini to avoid nested locking lockup. > Add rollback code for amdgpu_ctx_add_fence in case of error or signal > interruption. > > v4: > Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable > old fence waiting before reservation lock is aquired. > > Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 164 ++++++++++++++++++-------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 + > 3 files changed, 100 insertions(+), 69 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 53d8df3..baa2953 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -724,6 +724,7 @@ struct amdgpu_ctx { > struct dma_fence **fences; > struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; > bool preamble_presented; > + struct mutex lock; > }; > > struct amdgpu_ctx_mgr { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 9f1202a..0fa1bc7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > goto free_chunk; > } > > + > + mutex_lock(&p->ctx->lock); > + > /* get chunks */ > chunk_array_user = u64_to_user_ptr(cs->in.chunks); > if (copy_from_user(chunk_array, chunk_array_user, > @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) > /** > * cs_parser_fini() - clean parser states > * @parser: parser structure holding parsing context. > - * @error: error number > - * > - * If error is set than unvalidate buffer, otherwise just free memory > - * used by parsing context. > **/ > -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, > - bool backoff) > +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) > { > unsigned i; > > - if (error && backoff) > - ttm_eu_backoff_reservation(&parser->ticket, > - &parser->validated); > - > for (i = 0; i < parser->num_post_dep_syncobjs; i++) > drm_syncobj_put(parser->post_dep_syncobjs[i]); > kfree(parser->post_dep_syncobjs); > > dma_fence_put(parser->fence); > > - if (parser->ctx) > + if (parser->ctx) { > + mutex_unlock(&parser->ctx->lock); > amdgpu_ctx_put(parser->ctx); > + } > if (parser->bo_list) > amdgpu_bo_list_put(parser->bo_list); > > @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, > struct amdgpu_fpriv *fpriv = p->filp->driver_priv; > struct amdgpu_vm *vm = &fpriv->vm; > struct amdgpu_ring *ring = p->job->ring; > - int i, r; > + int i, j, r; > + > + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { > + > + struct amdgpu_cs_chunk *chunk; > + struct amdgpu_ib *ib; > + struct drm_amdgpu_cs_chunk_ib *chunk_ib; > + > + chunk = &p->chunks[i]; > + ib = &p->job->ibs[j]; > + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; > + > + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) > + continue; > + > + if (p->job->ring->funcs->parse_cs) { > + struct amdgpu_bo_va_mapping *m; > + struct amdgpu_bo *aobj = NULL; > + uint64_t offset; > + uint8_t *kptr; > + > + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, > + &aobj, &m); > + if (r) { > + DRM_ERROR("IB va_start is invalid\n"); > + return r; > + } > + > + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { > + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); > + return -EINVAL; > + } > + > + /* the IB should be reserved at this point */ > + r = amdgpu_bo_kmap(aobj, (void **)&kptr); > + if (r) { > + return r; > + } > + > + offset = m->start * AMDGPU_GPU_PAGE_SIZE; > + kptr += chunk_ib->va_start - offset; > + > + r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); > + if (r) { > + DRM_ERROR("Failed to get ib !\n"); > + return r; > + } > + > + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); > + amdgpu_bo_kunmap(aobj); > + } else { > + r = amdgpu_ib_get(adev, vm, 0, ib); > + if (r) { > + DRM_ERROR("Failed to get ib !\n"); > + return r; > + } > + > + } > + > + ib->gpu_addr = chunk_ib->va_start; > + ib->length_dw = chunk_ib->ib_bytes / 4; > + ib->flags = chunk_ib->flags; > + j++; > + > + } > > /* Only for UVD/VCE VM emulation */ > if (ring->funcs->parse_cs) { > @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, > static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, > struct amdgpu_cs_parser *parser) > { > - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; > - struct amdgpu_vm *vm = &fpriv->vm; > int i, j; > int r, ce_preempt = 0, de_preempt = 0; > > for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { > struct amdgpu_cs_chunk *chunk; > - struct amdgpu_ib *ib; > struct drm_amdgpu_cs_chunk_ib *chunk_ib; > struct amdgpu_ring *ring; > > chunk = &parser->chunks[i]; > - ib = &parser->job->ibs[j]; > chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; > > if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) > @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, > > parser->job->ring = ring; > > - if (ring->funcs->parse_cs) { > - struct amdgpu_bo_va_mapping *m; > - struct amdgpu_bo *aobj = NULL; > - uint64_t offset; > - uint8_t *kptr; > - > - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, > - &aobj, &m); > - if (r) { > - DRM_ERROR("IB va_start is invalid\n"); > - return r; > - } > - > - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { > - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); > - return -EINVAL; > - } > - > - /* the IB should be reserved at this point */ > - r = amdgpu_bo_kmap(aobj, (void **)&kptr); > - if (r) { > - return r; > - } > - > - offset = m->start * AMDGPU_GPU_PAGE_SIZE; > - kptr += chunk_ib->va_start - offset; > - > - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); > - if (r) { > - DRM_ERROR("Failed to get ib !\n"); > - return r; > - } > - > - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); > - amdgpu_bo_kunmap(aobj); > - } else { > - r = amdgpu_ib_get(adev, vm, 0, ib); > - if (r) { > - DRM_ERROR("Failed to get ib !\n"); > - return r; > - } > - > - } > - > - ib->gpu_addr = chunk_ib->va_start; > - ib->length_dw = chunk_ib->ib_bytes / 4; > - ib->flags = chunk_ib->flags; > j++; > } > > @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > amdgpu_cs_post_dependencies(p); > > + > + /* hook sched fence to all BOs' reservation in validated list > + * and unreserve them. > + * > + * we unreserve at here is because otherwise > + * there'll be deadlock between ctx_add_fence/sched_entity_push_job > + * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock > + */ > + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); > + > + > cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); > + > + > job->uf_sequence = cs->out.handle; > amdgpu_job_free_resources(job); > > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(&job->base); > > - ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); > amdgpu_mn_unlock(p->mn); > > return 0; > @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > > parser.adev = adev; > parser.filp = filp; > + fpriv = filp->driver_priv; > > r = amdgpu_cs_parser_init(&parser, data); > if (r) { > @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > goto out; > } > > + r = amdgpu_cs_ib_fill(adev, &parser); > + if (r) > + goto out; > + > r = amdgpu_cs_parser_bos(&parser, data); > if (r) { > if (r == -ENOMEM) > @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > } > > reserved_buffers = true; > - r = amdgpu_cs_ib_fill(adev, &parser); > - if (r) > - goto out; > > r = amdgpu_cs_dependencies(adev, &parser); > if (r) { > @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > r = amdgpu_cs_submit(&parser, cs); > > out: > - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); > + if (r && reserved_buffers) > + ttm_eu_backoff_reservation(&parser.ticket, &parser.validated); > + > + amdgpu_cs_parser_fini(&parser); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index a11e443..c073a68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) > if (!ctx->fences) > return -ENOMEM; > > + mutex_init(&ctx->lock); > + > for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { > ctx->rings[i].sequence = 1; > ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; > @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) > &ctx->rings[i].entity); > > amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); > + > + mutex_destroy(&ctx->lock); > } > > static int amdgpu_ctx_alloc(struct amdgpu_device *adev,