On 2020-02-26 3:37 p.m., Nirmoy Das wrote: > We were changing compute ring priority while rings were being used > before every job submission which is not recommended. This patch > recreates entity with higher/normal priority sched list when user > changes ctx's priority. > > high/normal priority sched list are generated from set of high/normal > priority compute queues. When there are no high priority hw queues then > it fall backs to software priority. > > Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 58 ++++++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ++ > 5 files changed, 59 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index f397ff97b4e4..8304d0c87899 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > struct amdgpu_fpriv *fpriv = p->filp->driver_priv; > struct drm_sched_entity *entity = p->entity; > enum drm_sched_priority priority; > - struct amdgpu_ring *ring; > struct amdgpu_bo_list_entry *e; > struct amdgpu_job *job; > uint64_t seq; > @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > priority = job->base.s_priority; > drm_sched_entity_push_job(&job->base, entity); > > - ring = to_amdgpu_ring(entity->rq->sched); > - amdgpu_ring_priority_get(ring, priority); > - > amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); > > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index 94a6c42f29ea..ea4dc57d2237 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -85,8 +85,13 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const > num_scheds = 1; > break; > case AMDGPU_HW_IP_COMPUTE: > - scheds = adev->gfx.compute_sched; > - num_scheds = adev->gfx.num_compute_sched; > + if (priority <= DRM_SCHED_PRIORITY_NORMAL) { > + scheds = adev->gfx.compute_sched; > + num_scheds = adev->gfx.num_compute_sched; > + } else { > + scheds = adev->gfx.compute_sched_high; > + num_scheds = adev->gfx.num_compute_sched_high; > + } Why more if-conditionals? If you initialize a map of DRM_SCHED_PRIORITY_MAX of type then you can simply do: scheds = adev->gfx.map[priority]; So, part of your array would contain normal and the rest high. Also, I don't think you should introduce yet another compute_sched[] array. All this should be folded into a single array containing both normal and high. Also consider changing to this: enum drm_sched_priority { DRM_SCHED_PRIORITY_UNSET, --------DRM_SCHED_PRIORITY_INVALID,--------<--- remove DRM_SCHED_PRIORITY_MIN, DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN, DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_HIGH_SW, DRM_SCHED_PRIORITY_HIGH_HW, DRM_SCHED_PRIORITY_KERNEL, DRM_SCHED_PRIORITY_MAX, }; We should never have an "invalid priority", just ignored priority. :-) Second, having 0 as UNSET gives you easy priority when you set map[0] = normal_priority, as memory usually comes memset to 0. IOW, you'd not need to check against UNSET, and simply use the default [0] which you'd set to normal priority. > break; > case AMDGPU_HW_IP_DMA: > scheds = adev->sdma.sdma_sched; > @@ -502,6 +507,24 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, > return fence; > } > > +static void amdgpu_ctx_hw_priority_override(struct amdgpu_ctx *ctx, > + const u32 hw_ip, > + enum drm_sched_priority priority) > +{ > + int i; > + > + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { > + if (!ctx->entities[hw_ip][i]) > + continue; > + > + /* TODO what happens with prev scheduled jobs */ > + drm_sched_entity_destroy(&ctx->entities[hw_ip][i]->entity); > + amdgpu_ctx_fini_entity(ctx->entities[hw_ip][i]); > + > + amdgpu_ctx_init_entity(ctx, AMDGPU_HW_IP_COMPUTE, i); > + > + } > +} > void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, > enum drm_sched_priority priority) > { > @@ -515,12 +538,18 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, > for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { > for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { > struct drm_sched_entity *entity; > + struct amdgpu_ring *ring; > > if (!ctx->entities[i][j]) > continue; > > entity = &ctx->entities[i][j]->entity; > - drm_sched_entity_set_priority(entity, ctx_prio); > + ring = to_amdgpu_ring(entity->rq->sched); > + > + if (ring->funcs->init_priority) > + amdgpu_ctx_hw_priority_override(ctx, i, priority); > + else > + drm_sched_entity_set_priority(entity, ctx_prio); Why more if-conditionals? Could we not have a map? > } > } > } > @@ -630,6 +659,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) > > void amdgpu_ctx_init_sched(struct amdgpu_device *adev) > { > + enum drm_sched_priority priority; > int i, j; > > for (i = 0; i < adev->gfx.num_gfx_rings; i++) { > @@ -638,8 +668,26 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev) > } > > for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched; > - adev->gfx.num_compute_sched++; > + priority = adev->gfx.compute_ring[i].priority; > + if (priority <= DRM_SCHED_PRIORITY_NORMAL) { > + adev->gfx.compute_sched[i] = > + &adev->gfx.compute_ring[i].sched; > + adev->gfx.num_compute_sched++; > + } else { > + adev->gfx.compute_sched_high[i] = > + &adev->gfx.compute_ring[i].sched; > + adev->gfx.num_compute_sched_high++; > + } > + } I think it would be better to use a map for this as opposed to if-conditional. > + > + /* if there are no high prio compute queue then mirror with normal > + * priority so amdgpu_ctx_init_entity() works as expected */ > + if (!adev->gfx.num_compute_sched_high) { > + for (i = 0; i < adev->gfx.num_compute_sched; i++) { > + adev->gfx.compute_sched_high[i] = > + adev->gfx.compute_sched[i]; > + } > + adev->gfx.num_compute_sched_high = adev->gfx.num_compute_sched; > } > > for (i = 0; i < adev->sdma.num_instances; i++) { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index ca17ffb01301..d58d748e3a56 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -279,7 +279,9 @@ struct amdgpu_gfx { > unsigned num_gfx_rings; > struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; > struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS]; > + struct drm_gpu_scheduler *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS]; > uint32_t num_compute_sched; > + uint32_t num_compute_sched_high; > unsigned num_compute_rings; > struct amdgpu_irq_src eop_irq; > struct amdgpu_irq_src priv_reg_irq; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index d42be880a236..4981e443a884 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) > > static void amdgpu_job_free_cb(struct drm_sched_job *s_job) > { > - struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); > struct amdgpu_job *job = to_amdgpu_job(s_job); > > drm_sched_job_cleanup(s_job); > > - amdgpu_ring_priority_put(ring, s_job->s_priority); > dma_fence_put(job->fence); > amdgpu_sync_free(&job->sync); > amdgpu_sync_free(&job->sched_sync); > @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, > void *owner, struct dma_fence **f) > { > enum drm_sched_priority priority; > - struct amdgpu_ring *ring; > int r; > > if (!f) > @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, > priority = job->base.s_priority; > drm_sched_entity_push_job(&job->base, entity); > > - ring = to_amdgpu_ring(entity->rq->sched); > - amdgpu_ring_priority_get(ring, priority); > - > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index 18e11b0fdc3e..4501ae7afb2e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -326,6 +326,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, > > ring->max_dw = max_dw; > ring->priority = DRM_SCHED_PRIORITY_NORMAL; > + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && > + ring->funcs->init_priority) > + ring->funcs->init_priority(ring); > + Why not add "init_priority" to all and just call it here unconditionally? Regards, Luben > mutex_init(&ring->priority_mutex); > > for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx