Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> On Tue, Nov 15, 2022 at 4:44 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 15.11.22 um 10:42 schrieb Christian König: > > It turned out that not the last IB specified is the gang leader, > > but instead the last job allocated. > > > > This is a bit unfortunate and not very intuitive for the CS > > interface, so try to fix this. > > Alex could you take a look at this? I would really like to get this into > the next -rc. > > Thanks, > Christian. > > > > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ++++++++++++++++------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 1 + > > 2 files changed, 17 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index 1bbd39b3b0fc..fbdf139cf497 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, > > return r; > > > > ++(num_ibs[r]); > > + p->gang_leader_idx = r; > > return 0; > > } > > > > @@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, > > if (ret) > > goto free_all_kdata; > > } > > - p->gang_leader = p->jobs[p->gang_size - 1]; > > + p->gang_leader = p->jobs[p->gang_leader_idx]; > > > > if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { > > ret = -ECANCELED; > > @@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) > > return r; > > } > > > > - for (i = 0; i < p->gang_size - 1; ++i) { > > + for (i = 0; i < p->gang_size; ++i) { > > + if (p->jobs[i] == leader) > > + continue; > > + > > r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); > > if (r) > > return r; > > } > > > > - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); > > + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); > > if (r && r != -ERESTARTSYS) > > DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); > > - > > return r; > > } > > > > @@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > for (i = 0; i < p->gang_size; ++i) > > drm_sched_job_arm(&p->jobs[i]->base); > > > > - for (i = 0; i < (p->gang_size - 1); ++i) { > > + for (i = 0; i < p->gang_size; ++i) { > > struct dma_fence *fence; > > > > + if (p->jobs[i] == leader) > > + continue; > > + > > fence = &p->jobs[i]->base.s_fence->scheduled; > > r = amdgpu_sync_fence(&leader->sync, fence); > > if (r) > > @@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > list_for_each_entry(e, &p->validated, tv.head) { > > > > /* Everybody except for the gang leader uses READ */ > > - for (i = 0; i < (p->gang_size - 1); ++i) { > > + for (i = 0; i < p->gang_size; ++i) { > > + if (p->jobs[i] == leader) > > + continue; > > + > > dma_resv_add_fence(e->tv.bo->base.resv, > > &p->jobs[i]->base.s_fence->finished, > > DMA_RESV_USAGE_READ); > > @@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > e->tv.num_shared = 0; > > } > > > > - seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], > > + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], > > p->fence); > > amdgpu_cs_post_dependencies(p); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h > > index cbaa19b2b8a3..f80adf9069ec 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h > > @@ -54,6 +54,7 @@ struct amdgpu_cs_parser { > > > > /* scheduler job objects */ > > unsigned int gang_size; > > + unsigned int gang_leader_idx; > > struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; > > struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; > > struct amdgpu_job *gang_leader; >