Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

Marek Olšák <maraeo@xxxxxxxxx> · Wed, 17 Jul 2019 10:27:18 -0400

On Wed., Jul. 17, 2019, 03:15 Christian König, <ckoenig.leichtzumerken@xxxxxxxxx> wrote:
Am 17.07.19 um 02:06 schrieb Marek Olšák:

> From: Marek Olšák <marek.olsak@xxxxxxx>

>

> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have

> separate GDS buffers for each gfx ring.

>

> This is a workaround to ensure stability of transform feedback. Shaders hang

> waiting for a GDS instruction (ds_sub, not ds_ordered_count).

>

> The disadvantage is that compute IBs might get a different VMID,

> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those 

reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS 

allocation?

I guess you mean GDS or OA.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting for ds_sub and can't continue, but GDS is idle.

Marek

Regards,

Christian.

>

> Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>

> ---

>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +

>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++

>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++

>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++

>   4 files changed, 37 insertions(+)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> index 4b514a44184c..cbd55d061b72 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {

>       struct drm_file         *filp;

>       struct amdgpu_ctx       *ctx;

>   

>       /* chunks */

>       unsigned                nchunks;

>       struct amdgpu_cs_chunk  *chunks;

>   

>       /* scheduler job object */

>       struct amdgpu_job       *job;

>       struct drm_sched_entity *entity;

> +     unsigned                hw_ip;

>   

>       /* buffer objects */

>       struct ww_acquire_ctx           ticket;

>       struct amdgpu_bo_list           *bo_list;

>       struct amdgpu_mn                *mn;

>       struct amdgpu_bo_list_entry     vm_pd;

>       struct list_head                validated;

>       struct dma_fence                *fence;

>       uint64_t                        bytes_moved_threshold;

>       uint64_t                        bytes_moved_vis_threshold;

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> index c691df6f7a57..9125cd69a124 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,

>       if (r)

>               goto error_validate;

>   

>       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,

>                                    p->bytes_moved_vis);

>   

>       gds = p->bo_list->gds_obj;

>       gws = p->bo_list->gws_obj;

>       oa = p->bo_list->oa_obj;

>   

> +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {

> +             /* Only gfx10 allocates these. */

> +             if (!gds)

> +                     gds = p->adev->gds.gds_gfx_bo;

> +             if (!oa)

> +                     oa = p->adev->gds.oa_gfx_bo;

> +     }

> +

>       amdgpu_bo_list_for_each_entry(e, p->bo_list) {

>               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

>   

>               /* Make sure we use the exclusive slot for shared BOs */

>               if (bo->prime_shared_count)

>                       e->tv.num_shared = 0;

>               e->bo_va = amdgpu_vm_bo_find(vm, bo);

>       }

>   

>       if (gds) {

> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

>               struct drm_amdgpu_cs_chunk_ib *chunk_ib;

>               struct drm_sched_entity *entity;

>   

>               chunk = &parser->chunks[i];

>               ib = &parser->job->ibs[j];

>               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;

>   

>               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)

>                       continue;

>   

> +             parser->hw_ip = chunk_ib->ip_type;

> +

>               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&

>                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {

>                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {

>                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)

>                                       ce_preempt++;

>                               else

>                                       de_preempt++;

>                       }

>   

>                       /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

> index df8a23554831..0943b8e1d97e 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

> @@ -26,20 +26,26 @@

>   

>   struct amdgpu_ring;

>   struct amdgpu_bo;

>   

>   struct amdgpu_gds {

>       uint32_t gds_size;

>       uint32_t gws_size;

>       uint32_t oa_size;

>       uint32_t gds_compute_max_wave_id;

>       uint32_t vgt_gs_max_wave_id;

> +

> +     /* Reserved OA for the gfx ring. (gfx10) */

> +     uint32_t gds_gfx_reservation_size;

> +     uint32_t oa_gfx_reservation_size;

> +     struct amdgpu_bo *gds_gfx_bo;

> +     struct amdgpu_bo *oa_gfx_bo;

>   };

>   

>   struct amdgpu_gds_reg_offset {

>       uint32_t        mem_base;

>       uint32_t        mem_size;

>       uint32_t        gws;

>       uint32_t        oa;

>   };

>   

>   #endif /* __AMDGPU_GDS_H__ */

> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

> index 618291df659b..3952754c04ff 100644

> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

> @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)

>   

>       kiq = &adev->gfx.kiq;

>       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);

>       if (r)

>               return r;

>   

>       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));

>       if (r)

>               return r;

>   

> +     /* allocate reserved GDS resources for transform feedback */

> +     r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,

> +                                 4, AMDGPU_GEM_DOMAIN_GDS,

> +                                 &adev->gds.gds_gfx_bo, NULL, NULL);

> +     if (r)

> +             return r;

> +

> +     r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,

> +                                 1, AMDGPU_GEM_DOMAIN_OA,

> +                                 &adev->gds.oa_gfx_bo, NULL, NULL);

> +     if (r)

> +             return r;

> +

>       /* allocate visible FB for rlc auto-loading fw */

>       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {

>               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);

>               if (r)

>                       return r;

>       }

>   

>       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;

>   

>       gfx_v10_0_gpu_early_init(adev);

> @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)

>       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,

>                             &adev->gfx.me.me_fw_gpu_addr,

>                             (void **)&adev->gfx.me.me_fw_ptr);

>   }

>   

>   static int gfx_v10_0_sw_fini(void *handle)

>   {

>       int i;

>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;

>   

> +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);

> +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);

> +

>       for (i = 0; i < adev->gfx.num_gfx_rings; i++)

>               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);

>       for (i = 0; i < adev->gfx.num_compute_rings; i++)

>               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);

>   

>       amdgpu_gfx_mqd_sw_fini(adev);

>       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);

>       amdgpu_gfx_kiq_fini(adev);

>   

>       gfx_v10_0_pfp_fini(adev);

> @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)

>       case CHIP_NAVI10:

>       default:

>               adev->gds.gds_size = 0x10000;

>               adev->gds.gds_compute_max_wave_id = 0x4ff;

>               adev->gds.vgt_gs_max_wave_id = 0x3ff;

>               break;

>       }

>   

>       adev->gds.gws_size = 64;

>       adev->gds.oa_size = 16;

> +

> +     /* Reserved for transform feedback. */

> +     adev->gds.gds_gfx_reservation_size = 256;

> +     adev->gds.oa_gfx_reservation_size = 4;

>   }

>   

>   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,

>                                                         u32 bitmap)

>   {

>       u32 data;

>   

>       if (!bitmap)

>               return;

>   

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx