From: Monk Liu <Monk.Liu@xxxxxxx> need to unreserve ttm bo before "cs_add_fence" and "entity_push_job" otherwise there will be deadlock between "recover_vram_from_shadow" and previous two routines on the ttm bo's resv lock. v2: Add per ctx mutex. v3: Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release into amdgpu_cs_parser_fini to avoid nested locking lockup. Add rollback code for amdgpu_ctx_add_fence in case of error or signal interruption. v4: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable old fence waiting before reservation lock is aquired. Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf Signed-off-by: Monk Liu <Monk.Liu at amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 164 ++++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 + 3 files changed, 100 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 53d8df3..baa2953 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -724,6 +724,7 @@ struct amdgpu_ctx { struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; bool preamble_presented; + struct mutex lock; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9f1202a..0fa1bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + + mutex_lock(&p->ctx->lock); + /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. - * @error: error number - * - * If error is set than unvalidate buffer, otherwise just free memory - * used by parsing context. **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, - bool backoff) +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { unsigned i; - if (error && backoff) - ttm_eu_backoff_reservation(&parser->ticket, - &parser->validated); - for (i = 0; i < parser->num_post_dep_syncobjs; i++) drm_syncobj_put(parser->post_dep_syncobjs[i]); kfree(parser->post_dep_syncobjs); dma_fence_put(parser->fence); - if (parser->ctx) + if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); + } if (parser->bo_list) amdgpu_bo_list_put(parser->bo_list); @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, r; + int i, j, r; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + if (p->job->ring->funcs->parse_cs) { + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + uint64_t offset; + uint8_t *kptr; + + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, + &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } + + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += chunk_ib->va_start - offset; + + r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + } else { + r = amdgpu_ib_get(adev, vm, 0, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + } + + ib->gpu_addr = chunk_ib->va_start; + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; + j++; + + } /* Only for UVD/VCE VM emulation */ if (ring->funcs->parse_cs) { @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, struct amdgpu_cs_parser *parser) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; int i, j; int r, ce_preempt = 0, de_preempt = 0; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct amdgpu_ring *ring; chunk = &parser->chunks[i]; - ib = &parser->job->ibs[j]; chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring = ring; - if (ring->funcs->parse_cs) { - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - uint64_t offset; - uint8_t *kptr; - - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; - - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - } else { - r = amdgpu_ib_get(adev, vm, 0, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - } - - ib->gpu_addr = chunk_ib->va_start; - ib->length_dw = chunk_ib->ib_bytes / 4; - ib->flags = chunk_ib->flags; j++; } @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_cs_post_dependencies(p); + + /* hook sched fence to all BOs' reservation in validated list + * and unreserve them. + * + * we unreserve at here is because otherwise + * there'll be deadlock between ctx_add_fence/sched_entity_push_job + * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock + */ + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); + + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); + + job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); - ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); return 0; @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser.adev = adev; parser.filp = filp; + fpriv = filp->driver_priv; r = amdgpu_cs_parser_init(&parser, data); if (r) { @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } + r = amdgpu_cs_ib_fill(adev, &parser); + if (r) + goto out; + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); - if (r) - goto out; r = amdgpu_cs_dependencies(adev, &parser); if (r) { @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(&parser, cs); out: - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + if (r && reserved_buffers) + ttm_eu_backoff_reservation(&parser.ticket, &parser.validated); + + amdgpu_cs_parser_fini(&parser); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..c073a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) if (!ctx->fences) return -ENOMEM; + mutex_init(&ctx->lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + + mutex_destroy(&ctx->lock); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, -- 2.7.4