Re: [PATCH] drm/amdgpu: use the last IB as gang leader v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I can confirm this patch solves an issue with gang submit.
It removes the necessity to sort IBs by IP type in user space.
Now only the IP type of the last IB matters, as was intended.

Tested-by: Timur Kristóf <timur.kristof@xxxxxxxxx>
Acked-by: Timur Kristóf <timur.kristof@xxxxxxxxx>

On Tue, 2022-11-15 at 10:43 +0100, Christian König wrote:
> Am 15.11.22 um 10:42 schrieb Christian König:
> > It turned out that not the last IB specified is the gang leader,
> > but instead the last job allocated.
> > 
> > This is a bit unfortunate and not very intuitive for the CS
> > interface, so try to fix this.
> 
> Alex could you take a look at this? I would really like to get this
> into 
> the next -rc.
> 
> Thanks,
> Christian.
> 
> > 
> > Signed-off-by: Christian König <christian.koenig@xxxxxxx>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ++++++++++++++++-----
> > --
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |  1 +
> >   2 files changed, 17 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 1bbd39b3b0fc..fbdf139cf497 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct
> > amdgpu_cs_parser *p,
> >                 return r;
> >   
> >         ++(num_ibs[r]);
> > +       p->gang_leader_idx = r;
> >         return 0;
> >   }
> >   
> > @@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct
> > amdgpu_cs_parser *p,
> >                 if (ret)
> >                         goto free_all_kdata;
> >         }
> > -       p->gang_leader = p->jobs[p->gang_size - 1];
> > +       p->gang_leader = p->jobs[p->gang_leader_idx];
> >   
> >         if (p->ctx->vram_lost_counter != p->gang_leader-
> > >vram_lost_counter) {
> >                 ret = -ECANCELED;
> > @@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct
> > amdgpu_cs_parser *p)
> >                         return r;
> >         }
> >   
> > -       for (i = 0; i < p->gang_size - 1; ++i) {
> > +       for (i = 0; i < p->gang_size; ++i) {
> > +               if (p->jobs[i] == leader)
> > +                       continue;
> > +
> >                 r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]-
> > >sync);
> >                 if (r)
> >                         return r;
> >         }
> >   
> > -       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p-
> > >gang_size - 1]);
> > +       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p-
> > >gang_leader_idx]);
> >         if (r && r != -ERESTARTSYS)
> >                 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
> > -
> >         return r;
> >   }
> >   
> > @@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >         for (i = 0; i < p->gang_size; ++i)
> >                 drm_sched_job_arm(&p->jobs[i]->base);
> >   
> > -       for (i = 0; i < (p->gang_size - 1); ++i) {
> > +       for (i = 0; i < p->gang_size; ++i) {
> >                 struct dma_fence *fence;
> >   
> > +               if (p->jobs[i] == leader)
> > +                       continue;
> > +
> >                 fence = &p->jobs[i]->base.s_fence->scheduled;
> >                 r = amdgpu_sync_fence(&leader->sync, fence);
> >                 if (r)
> > @@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >         list_for_each_entry(e, &p->validated, tv.head) {
> >   
> >                 /* Everybody except for the gang leader uses READ
> > */
> > -               for (i = 0; i < (p->gang_size - 1); ++i) {
> > +               for (i = 0; i < p->gang_size; ++i) {
> > +                       if (p->jobs[i] == leader)
> > +                               continue;
> > +
> >                         dma_resv_add_fence(e->tv.bo->base.resv,
> >                                            &p->jobs[i]-
> > >base.s_fence->finished,
> >                                            DMA_RESV_USAGE_READ);
> > @@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >                 e->tv.num_shared = 0;
> >         }
> >   
> > -       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size
> > - 1],
> > +       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p-
> > >gang_leader_idx],
> >                                    p->fence);
> >         amdgpu_cs_post_dependencies(p);
> >   
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > index cbaa19b2b8a3..f80adf9069ec 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > @@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
> >   
> >         /* scheduler job objects */
> >         unsigned int            gang_size;
> > +       unsigned int            gang_leader_idx;
> >         struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
> >         struct amdgpu_job       *jobs[AMDGPU_CS_GANG_SIZE];
> >         struct amdgpu_job       *gang_leader;
> 





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux