Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Wed, 17 Jul 2019 16:43:16 +0200



    Am 17.07.19 um 16:27 schrieb Marek
      Olšák:

    
            On Wed., Jul. 17, 2019,
              03:15 Christian König, <ckoenig.leichtzumerken@xxxxxxxxx>
              wrote:

            
            Am
              17.07.19 um 02:06 schrieb Marek Olšák:

              > From: Marek Olšák <marek.olsak@xxxxxxx>

              >

              > Hopefully we'll only use 1 gfx ring, because
              otherwise we'd have to have

              > separate GDS buffers for each gfx ring.

              >

              > This is a workaround to ensure stability of transform
              feedback. Shaders hang

              > waiting for a GDS instruction (ds_sub, not
              ds_ordered_count).

              >

              > The disadvantage is that compute IBs might get a
              different VMID,

              > because now gfx always has GDS and compute doesn't.

              
              So far compute is only using GWS, but I don't think that
              those 

              reservations are a good idea in general.

              
              How severe is the ENOMEM problem you see with using an
              explicit GWS 

              allocation?

            
        I guess you mean GDS or OA.
      
    
    Yeah, just a typo. Compute is using GWS and we want to use GDS and
    OA here.

    
        There is no ENOMEM, it just hangs. I don't know
          why. The shader is waiting for ds_sub and can't continue, but
          GDS is idle.
      
    
    Well could it be because we don't correctly handle non zero offsets
    or stuff like that?

    
    Does it work with this hack when you don't allocate GDS/OA from the
    start? (Just allocate it twice or something like this).

    
    Christian.

    
        Marek
        

              Regards,

              Christian.

              
              >

              > Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>

              > ---

              >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +

              >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10
              ++++++++++

              >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++

              >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20
              ++++++++++++++++++++

              >   4 files changed, 37 insertions(+)

              >

              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
              b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

              > index 4b514a44184c..cbd55d061b72 100644

              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

              > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {

              >       struct drm_file         *filp;

              >       struct amdgpu_ctx       *ctx;

              >   

              >       /* chunks */

              >       unsigned                nchunks;

              >       struct amdgpu_cs_chunk  *chunks;

              >   

              >       /* scheduler job object */

              >       struct amdgpu_job       *job;

              >       struct drm_sched_entity *entity;

              > +     unsigned                hw_ip;

              >   

              >       /* buffer objects */

              >       struct ww_acquire_ctx           ticket;

              >       struct amdgpu_bo_list           *bo_list;

              >       struct amdgpu_mn                *mn;

              >       struct amdgpu_bo_list_entry     vm_pd;

              >       struct list_head                validated;

              >       struct dma_fence                *fence;

              >       uint64_t                       
              bytes_moved_threshold;

              >       uint64_t                       
              bytes_moved_vis_threshold;

              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
              b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

              > index c691df6f7a57..9125cd69a124 100644

              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

              > @@ -678,20 +678,28 @@ static int
              amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,

              >       if (r)

              >               goto error_validate;

              >   

              >       amdgpu_cs_report_moved_bytes(p->adev,
              p->bytes_moved,

              >                                   
              p->bytes_moved_vis);

              >   

              >       gds = p->bo_list->gds_obj;

              >       gws = p->bo_list->gws_obj;

              >       oa = p->bo_list->oa_obj;

              >   

              > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {

              > +             /* Only gfx10 allocates these. */

              > +             if (!gds)

              > +                     gds =
              p->adev->gds.gds_gfx_bo;

              > +             if (!oa)

              > +                     oa =
              p->adev->gds.oa_gfx_bo;

              > +     }

              > +

              >       amdgpu_bo_list_for_each_entry(e, p->bo_list)
              {

              >               struct amdgpu_bo *bo =
              ttm_to_amdgpu_bo(e->tv.bo);

              >   

              >               /* Make sure we use the exclusive slot
              for shared BOs */

              >               if (bo->prime_shared_count)

              >                       e->tv.num_shared = 0;

              >               e->bo_va = amdgpu_vm_bo_find(vm,
              bo);

              >       }

              >   

              >       if (gds) {

              > @@ -954,20 +962,22 @@ static int
              amdgpu_cs_ib_fill(struct amdgpu_device *adev,

              >               struct drm_amdgpu_cs_chunk_ib
              *chunk_ib;

              >               struct drm_sched_entity *entity;

              >   

              >               chunk = &parser->chunks[i];

              >               ib = &parser->job->ibs[j];

              >               chunk_ib = (struct
              drm_amdgpu_cs_chunk_ib *)chunk->kdata;

              >   

              >               if (chunk->chunk_id !=
              AMDGPU_CHUNK_ID_IB)

              >                       continue;

              >   

              > +             parser->hw_ip =
              chunk_ib->ip_type;

              > +

              >               if (chunk_ib->ip_type ==
              AMDGPU_HW_IP_GFX &&

              >                   (amdgpu_mcbp ||
              amdgpu_sriov_vf(adev))) {

              >                       if (chunk_ib->flags &
              AMDGPU_IB_FLAG_PREEMPT) {

              >                               if (chunk_ib->flags
              & AMDGPU_IB_FLAG_CE)

              >                                       ce_preempt++;

              >                               else

              >                                       de_preempt++;

              >                       }

              >   

              >                       /* each GFX command submit
              allows 0 or 1 IB preemptible for CE & DE */

              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
              b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

              > index df8a23554831..0943b8e1d97e 100644

              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

              > @@ -26,20 +26,26 @@

              >   

              >   struct amdgpu_ring;

              >   struct amdgpu_bo;

              >   

              >   struct amdgpu_gds {

              >       uint32_t gds_size;

              >       uint32_t gws_size;

              >       uint32_t oa_size;

              >       uint32_t gds_compute_max_wave_id;

              >       uint32_t vgt_gs_max_wave_id;

              > +

              > +     /* Reserved OA for the gfx ring. (gfx10) */

              > +     uint32_t gds_gfx_reservation_size;

              > +     uint32_t oa_gfx_reservation_size;

              > +     struct amdgpu_bo *gds_gfx_bo;

              > +     struct amdgpu_bo *oa_gfx_bo;

              >   };

              >   

              >   struct amdgpu_gds_reg_offset {

              >       uint32_t        mem_base;

              >       uint32_t        mem_size;

              >       uint32_t        gws;

              >       uint32_t        oa;

              >   };

              >   

              >   #endif /* __AMDGPU_GDS_H__ */

              > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
              b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

              > index 618291df659b..3952754c04ff 100644

              > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

              > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

              > @@ -1314,20 +1314,33 @@ static int
              gfx_v10_0_sw_init(void *handle)

              >   

              >       kiq = &adev->gfx.kiq;

              >       r = amdgpu_gfx_kiq_init_ring(adev,
              &kiq->ring, &kiq->irq);

              >       if (r)

              >               return r;

              >   

              >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct
              v10_compute_mqd));

              >       if (r)

              >               return r;

              >   

              > +     /* allocate reserved GDS resources for
              transform feedback */

              > +     r = amdgpu_bo_create_kernel(adev,
              adev->gds.gds_gfx_reservation_size,

              > +                                 4,
              AMDGPU_GEM_DOMAIN_GDS,

              > +                               
               &adev->gds.gds_gfx_bo, NULL, NULL);

              > +     if (r)

              > +             return r;

              > +

              > +     r = amdgpu_bo_create_kernel(adev,
              adev->gds.oa_gfx_reservation_size,

              > +                                 1,
              AMDGPU_GEM_DOMAIN_OA,

              > +                               
               &adev->gds.oa_gfx_bo, NULL, NULL);

              > +     if (r)

              > +             return r;

              > +

              >       /* allocate visible FB for rlc auto-loading fw
              */

              >       if (adev->firmware.load_type ==
              AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {

              >               r =
              gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);

              >               if (r)

              >                       return r;

              >       }

              >   

              >       adev->gfx.ce_ram_size =
              F32_CE_PROGRAM_RAM_SIZE;

              >   

              >       gfx_v10_0_gpu_early_init(adev);

              > @@ -1354,20 +1367,23 @@ static void
              gfx_v10_0_me_fini(struct amdgpu_device *adev)

              >     
               amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,

              >                           
               &adev->gfx.me.me_fw_gpu_addr,

              >                             (void
              **)&adev->gfx.me.me_fw_ptr);

              >   }

              >   

              >   static int gfx_v10_0_sw_fini(void *handle)

              >   {

              >       int i;

              >       struct amdgpu_device *adev = (struct
              amdgpu_device *)handle;

              >   

              > +   
               amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL,
              NULL);

              > +   
               amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL,
              NULL);

              > +

              >       for (i = 0; i < adev->gfx.num_gfx_rings;
              i++)

              >             
               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);

              >       for (i = 0; i <
              adev->gfx.num_compute_rings; i++)

              >             
               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);

              >   

              >       amdgpu_gfx_mqd_sw_fini(adev);

              >     
               amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring,
              &adev->gfx.kiq.irq);

              >       amdgpu_gfx_kiq_fini(adev);

              >   

              >       gfx_v10_0_pfp_fini(adev);

              > @@ -5181,20 +5197,24 @@ static void
              gfx_v10_0_set_gds_init(struct amdgpu_device *adev)

              >       case CHIP_NAVI10:

              >       default:

              >               adev->gds.gds_size = 0x10000;

              >               adev->gds.gds_compute_max_wave_id =
              0x4ff;

              >               adev->gds.vgt_gs_max_wave_id =
              0x3ff;

              >               break;

              >       }

              >   

              >       adev->gds.gws_size = 64;

              >       adev->gds.oa_size = 16;

              > +

              > +     /* Reserved for transform feedback. */

              > +     adev->gds.gds_gfx_reservation_size = 256;

              > +     adev->gds.oa_gfx_reservation_size = 4;

              >   }

              >   

              >   static void
              gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
              amdgpu_device *adev,

              >                                                     
                 u32 bitmap)

              >   {

              >       u32 data;

              >   

              >       if (!bitmap)

              >               return;

              >   

              
      _______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
    
    
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx