Am 17.07.19 um 16:27 schrieb Marek
Olšák:
Am
17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@xxxxxxx>
>
> Hopefully we'll only use 1 gfx ring, because
otherwise we'd have to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform
feedback. Shaders hang
> waiting for a GDS instruction (ds_sub, not
ds_ordered_count).
>
> The disadvantage is that compute IBs might get a
different VMID,
> because now gfx always has GDS and compute doesn't.
So far compute is only using GWS, but I don't think that
those
reservations are a good idea in general.
How severe is the ENOMEM problem you see with using an
explicit GWS
allocation?
I guess you mean GDS or OA.
Yeah, just a typo. Compute is using GWS and we want to use GDS and
OA here.
There is no ENOMEM, it just hangs. I don't know
why. The shader is waiting for ds_sub and can't continue, but
GDS is idle.
Well could it be because we don't correctly handle non zero offsets
or stuff like that?
Does it work with this hack when you don't allocate GDS/OA from the
start? (Just allocate it twice or something like this).
Christian.
Marek
Regards,
Christian.
>
> Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10
++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 ++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20
++++++++++++++++++++
> 4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
> struct drm_file *filp;
> struct amdgpu_ctx *ctx;
>
> /* chunks */
> unsigned nchunks;
> struct amdgpu_cs_chunk *chunks;
>
> /* scheduler job object */
> struct amdgpu_job *job;
> struct drm_sched_entity *entity;
> + unsigned hw_ip;
>
> /* buffer objects */
> struct ww_acquire_ctx ticket;
> struct amdgpu_bo_list *bo_list;
> struct amdgpu_mn *mn;
> struct amdgpu_bo_list_entry vm_pd;
> struct list_head validated;
> struct dma_fence *fence;
> uint64_t
bytes_moved_threshold;
> uint64_t
bytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int
amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> if (r)
> goto error_validate;
>
> amdgpu_cs_report_moved_bytes(p->adev,
p->bytes_moved,
>
p->bytes_moved_vis);
>
> gds = p->bo_list->gds_obj;
> gws = p->bo_list->gws_obj;
> oa = p->bo_list->oa_obj;
>
> + if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> + /* Only gfx10 allocates these. */
> + if (!gds)
> + gds =
p->adev->gds.gds_gfx_bo;
> + if (!oa)
> + oa =
p->adev->gds.oa_gfx_bo;
> + }
> +
> amdgpu_bo_list_for_each_entry(e, p->bo_list)
{
> struct amdgpu_bo *bo =
ttm_to_amdgpu_bo(e->tv.bo);
>
> /* Make sure we use the exclusive slot
for shared BOs */
> if (bo->prime_shared_count)
> e->tv.num_shared = 0;
> e->bo_va = amdgpu_vm_bo_find(vm,
bo);
> }
>
> if (gds) {
> @@ -954,20 +962,22 @@ static int
amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> struct drm_amdgpu_cs_chunk_ib
*chunk_ib;
> struct drm_sched_entity *entity;
>
> chunk = &parser->chunks[i];
> ib = &parser->job->ibs[j];
> chunk_ib = (struct
drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>
> if (chunk->chunk_id !=
AMDGPU_CHUNK_ID_IB)
> continue;
>
> + parser->hw_ip =
chunk_ib->ip_type;
> +
> if (chunk_ib->ip_type ==
AMDGPU_HW_IP_GFX &&
> (amdgpu_mcbp ||
amdgpu_sriov_vf(adev))) {
> if (chunk_ib->flags &
AMDGPU_IB_FLAG_PREEMPT) {
> if (chunk_ib->flags
& AMDGPU_IB_FLAG_CE)
> ce_preempt++;
> else
> de_preempt++;
> }
>
> /* each GFX command submit
allows 0 or 1 IB preemptible for CE & DE */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index df8a23554831..0943b8e1d97e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -26,20 +26,26 @@
>
> struct amdgpu_ring;
> struct amdgpu_bo;
>
> struct amdgpu_gds {
> uint32_t gds_size;
> uint32_t gws_size;
> uint32_t oa_size;
> uint32_t gds_compute_max_wave_id;
> uint32_t vgt_gs_max_wave_id;
> +
> + /* Reserved OA for the gfx ring. (gfx10) */
> + uint32_t gds_gfx_reservation_size;
> + uint32_t oa_gfx_reservation_size;
> + struct amdgpu_bo *gds_gfx_bo;
> + struct amdgpu_bo *oa_gfx_bo;
> };
>
> struct amdgpu_gds_reg_offset {
> uint32_t mem_base;
> uint32_t mem_size;
> uint32_t gws;
> uint32_t oa;
> };
>
> #endif /* __AMDGPU_GDS_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..3952754c04ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1314,20 +1314,33 @@ static int
gfx_v10_0_sw_init(void *handle)
>
> kiq = &adev->gfx.kiq;
> r = amdgpu_gfx_kiq_init_ring(adev,
&kiq->ring, &kiq->irq);
> if (r)
> return r;
>
> r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct
v10_compute_mqd));
> if (r)
> return r;
>
> + /* allocate reserved GDS resources for
transform feedback */
> + r = amdgpu_bo_create_kernel(adev,
adev->gds.gds_gfx_reservation_size,
> + 4,
AMDGPU_GEM_DOMAIN_GDS,
> +
&adev->gds.gds_gfx_bo, NULL, NULL);
> + if (r)
> + return r;
> +
> + r = amdgpu_bo_create_kernel(adev,
adev->gds.oa_gfx_reservation_size,
> + 1,
AMDGPU_GEM_DOMAIN_OA,
> +
&adev->gds.oa_gfx_bo, NULL, NULL);
> + if (r)
> + return r;
> +
> /* allocate visible FB for rlc auto-loading fw
*/
> if (adev->firmware.load_type ==
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
> r =
gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
> if (r)
> return r;
> }
>
> adev->gfx.ce_ram_size =
F32_CE_PROGRAM_RAM_SIZE;
>
> gfx_v10_0_gpu_early_init(adev);
> @@ -1354,20 +1367,23 @@ static void
gfx_v10_0_me_fini(struct amdgpu_device *adev)
>
amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>
&adev->gfx.me.me_fw_gpu_addr,
> (void
**)&adev->gfx.me.me_fw_ptr);
> }
>
> static int gfx_v10_0_sw_fini(void *handle)
> {
> int i;
> struct amdgpu_device *adev = (struct
amdgpu_device *)handle;
>
> +
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL,
NULL);
> +
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL,
NULL);
> +
> for (i = 0; i < adev->gfx.num_gfx_rings;
i++)
>
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> for (i = 0; i <
adev->gfx.num_compute_rings; i++)
>
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>
> amdgpu_gfx_mqd_sw_fini(adev);
>
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring,
&adev->gfx.kiq.irq);
> amdgpu_gfx_kiq_fini(adev);
>
> gfx_v10_0_pfp_fini(adev);
> @@ -5181,20 +5197,24 @@ static void
gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
> case CHIP_NAVI10:
> default:
> adev->gds.gds_size = 0x10000;
> adev->gds.gds_compute_max_wave_id =
0x4ff;
> adev->gds.vgt_gs_max_wave_id =
0x3ff;
> break;
> }
>
> adev->gds.gws_size = 64;
> adev->gds.oa_size = 16;
> +
> + /* Reserved for transform feedback. */
> + adev->gds.gds_gfx_reservation_size = 256;
> + adev->gds.oa_gfx_reservation_size = 4;
> }
>
> static void
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
amdgpu_device *adev,
>
u32 bitmap)
> {
> u32 data;
>
> if (!bitmap)
> return;
>
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
|