Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 17.07.19 um 16:27 schrieb Marek Olšák:


On Wed., Jul. 17, 2019, 03:15 Christian König, <ckoenig.leichtzumerken@xxxxxxxxx> wrote:
Am 17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@xxxxxxx>
>
> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform feedback. Shaders hang
> waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>
> The disadvantage is that compute IBs might get a different VMID,
> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those
reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS
allocation?

I guess you mean GDS or OA.

Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting for ds_sub and can't continue, but GDS is idle.

Well could it be because we don't correctly handle non zero offsets or stuff like that?

Does it work with this hack when you don't allocate GDS/OA from the start? (Just allocate it twice or something like this).

Christian.


Marek


Regards,
Christian.

>
> Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>   4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>       struct drm_file         *filp;
>       struct amdgpu_ctx       *ctx;
>   
>       /* chunks */
>       unsigned                nchunks;
>       struct amdgpu_cs_chunk  *chunks;
>   
>       /* scheduler job object */
>       struct amdgpu_job       *job;
>       struct drm_sched_entity *entity;
> +     unsigned                hw_ip;
>   
>       /* buffer objects */
>       struct ww_acquire_ctx           ticket;
>       struct amdgpu_bo_list           *bo_list;
>       struct amdgpu_mn                *mn;
>       struct amdgpu_bo_list_entry     vm_pd;
>       struct list_head                validated;
>       struct dma_fence                *fence;
>       uint64_t                        bytes_moved_threshold;
>       uint64_t                        bytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>       if (r)
>               goto error_validate;
>   
>       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>                                    p->bytes_moved_vis);
>   
>       gds = p->bo_list->gds_obj;
>       gws = p->bo_list->gws_obj;
>       oa = p->bo_list->oa_obj;
>   
> +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> +             /* Only gfx10 allocates these. */
> +             if (!gds)
> +                     gds = p->adev->gds.gds_gfx_bo;
> +             if (!oa)
> +                     oa = p->adev->gds.oa_gfx_bo;
> +     }
> +
>       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>   
>               /* Make sure we use the exclusive slot for shared BOs */
>               if (bo->prime_shared_count)
>                       e->tv.num_shared = 0;
>               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>       }
>   
>       if (gds) {
> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>               struct drm_sched_entity *entity;
>   
>               chunk = &parser->chunks[i];
>               ib = &parser->job->ibs[j];
>               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>   
>               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>                       continue;
>   
> +             parser->hw_ip = chunk_ib->ip_type;
> +
>               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
>                                       ce_preempt++;
>                               else
>                                       de_preempt++;
>                       }
>   
>                       /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index df8a23554831..0943b8e1d97e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -26,20 +26,26 @@
>   
>   struct amdgpu_ring;
>   struct amdgpu_bo;
>   
>   struct amdgpu_gds {
>       uint32_t gds_size;
>       uint32_t gws_size;
>       uint32_t oa_size;
>       uint32_t gds_compute_max_wave_id;
>       uint32_t vgt_gs_max_wave_id;
> +
> +     /* Reserved OA for the gfx ring. (gfx10) */
> +     uint32_t gds_gfx_reservation_size;
> +     uint32_t oa_gfx_reservation_size;
> +     struct amdgpu_bo *gds_gfx_bo;
> +     struct amdgpu_bo *oa_gfx_bo;
>   };
>   
>   struct amdgpu_gds_reg_offset {
>       uint32_t        mem_base;
>       uint32_t        mem_size;
>       uint32_t        gws;
>       uint32_t        oa;
>   };
>   
>   #endif /* __AMDGPU_GDS_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..3952754c04ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>   
>       kiq = &adev->gfx.kiq;
>       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>       if (r)
>               return r;
>   
>       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
>       if (r)
>               return r;
>   
> +     /* allocate reserved GDS resources for transform feedback */
> +     r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,
> +                                 4, AMDGPU_GEM_DOMAIN_GDS,
> +                                 &adev->gds.gds_gfx_bo, NULL, NULL);
> +     if (r)
> +             return r;
> +
> +     r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,
> +                                 1, AMDGPU_GEM_DOMAIN_OA,
> +                                 &adev->gds.oa_gfx_bo, NULL, NULL);
> +     if (r)
> +             return r;
> +
>       /* allocate visible FB for rlc auto-loading fw */
>       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
>               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>               if (r)
>                       return r;
>       }
>   
>       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>   
>       gfx_v10_0_gpu_early_init(adev);
> @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
>       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>                             &adev->gfx.me.me_fw_gpu_addr,
>                             (void **)&adev->gfx.me.me_fw_ptr);
>   }
>   
>   static int gfx_v10_0_sw_fini(void *handle)
>   {
>       int i;
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> +
>       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>   
>       amdgpu_gfx_mqd_sw_fini(adev);
>       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>       amdgpu_gfx_kiq_fini(adev);
>   
>       gfx_v10_0_pfp_fini(adev);
> @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
>       case CHIP_NAVI10:
>       default:
>               adev->gds.gds_size = 0x10000;
>               adev->gds.gds_compute_max_wave_id = 0x4ff;
>               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>               break;
>       }
>   
>       adev->gds.gws_size = 64;
>       adev->gds.oa_size = 16;
> +
> +     /* Reserved for transform feedback. */
> +     adev->gds.gds_gfx_reservation_size = 256;
> +     adev->gds.oa_gfx_reservation_size = 4;
>   }
>   
>   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
>                                                         u32 bitmap)
>   {
>       u32 data;
>   
>       if (!bitmap)
>               return;
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux