On Tue, Jan 29, 2019 at 11:33 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Am 29.01.19 um 11:20 schrieb Bas Nieuwenhuizen: > > If we have some disabled HW blocks (say VCN), then the rings are > > not initialized. This reuslts in entities that refer to uninitialized > > rqs (runqueues?). > > > > In normal usage this does not result in issues because userspace > > generally knows to ignore the unsupported blocks, but e.g. setting > > the priorities on all the entities resulted in a NULL access while > > locking the rq spinlock. > > > > This could probably also be induced when actually adding a job for > > the blocks whith some less smart userspace. > > In general I agree that we need to improve the handling here. But this > looks completely incorrect to me. In what sense is this incorrect? I'm fencing of all access to entities which use an uninitialized ring (and therefore rq) and do not initialize/deinitialize them. > > We should always initialize all entities, but only with rq which are > initialized as well. > > When this results in zero rq then we can later on handle that gracefully. > > Regards, > Christian. > > > > > Signed-off-by: Bas Nieuwenhuizen <bas@xxxxxxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++++++++++++++++++------ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + > > 2 files changed, 25 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > > index d85184b5b35c..6f72ce785b32 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > > @@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, > > for (j = 0; j < num_rings; ++j) > > rqs[j] = &rings[j]->sched.sched_rq[priority]; > > > > - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) > > - r = drm_sched_entity_init(&ctx->entities[i][j].entity, > > - rqs, num_rings, &ctx->guilty); > > + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { > > + ctx->entities[i][j].enabled = rings[0]->adev != NULL; > > + if (ctx->entities[i][j].enabled) { > > + r = drm_sched_entity_init(&ctx->entities[i][j].entity, > > + rqs, num_rings, &ctx->guilty); > > + } > > + } > > if (r) > > goto error_cleanup_entities; > > } > > @@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, > > > > error_cleanup_entities: > > for (i = 0; i < num_entities; ++i) > > - drm_sched_entity_destroy(&ctx->entities[0][i].entity); > > + if (ctx->entities[0][i].enabled) > > + drm_sched_entity_destroy(&ctx->entities[0][i].entity); > > kfree(ctx->entities[0]); > > > > error_free_fences: > > @@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, > > return -EINVAL; > > } > > > > + if (!ctx->entities[hw_ip][ring].enabled) { > > + DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring); > > + return -EINVAL; > > + } > > + > > *entity = &ctx->entities[hw_ip][ring].entity; > > return 0; > > } > > @@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref) > > num_entities += amdgpu_ctx_num_entities[i]; > > > > for (i = 0; i < num_entities; i++) > > - drm_sched_entity_destroy(&ctx->entities[0][i].entity); > > + if (ctx->entities[0][i].enabled) > > + drm_sched_entity_destroy(&ctx->entities[0][i].entity); > > > > amdgpu_ctx_fini(ref); > > } > > @@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, > > for (i = 0; i < num_entities; i++) { > > struct drm_sched_entity *entity = &ctx->entities[0][i].entity; > > > > - drm_sched_entity_set_priority(entity, ctx_prio); > > + > > + if (ctx->entities[0][1].enabled) > > + drm_sched_entity_set_priority(entity, ctx_prio); > > } > > } > > > > @@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) > > for (i = 0; i < num_entities; i++) { > > struct drm_sched_entity *entity; > > > > + if (!ctx->entities[0][i].enabled) > > + continue; > > + > > entity = &ctx->entities[0][i].entity; > > max_wait = drm_sched_entity_flush(entity, max_wait); > > } > > @@ -584,7 +600,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) > > } > > > > for (i = 0; i < num_entities; i++) > > - drm_sched_entity_fini(&ctx->entities[0][i].entity); > > + if (ctx->entities[0][i].enabled) > > + drm_sched_entity_fini(&ctx->entities[0][i].entity); > > } > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > > index b3b012c0a7da..183a783aedd8 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > > @@ -33,6 +33,7 @@ struct amdgpu_ctx_entity { > > uint64_t sequence; > > struct dma_fence **fences; > > struct drm_sched_entity entity; > > + bool enabled; > > }; > > > > struct amdgpu_ctx { > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx