From: Marek Olšák <marek.olsak@xxxxxxx> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have separate GDS buffers for each gfx ring. This is a workaround to ensure stability of transform feedback. Shaders hang waiting for a GDS instruction (ds_sub, not ds_ordered_count). The disadvantage is that compute IBs might get a different VMID, because now gfx always has GDS and compute doesn't. Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4b514a44184c..cbd55d061b72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -456,20 +456,21 @@ struct amdgpu_cs_parser { struct drm_file *filp; struct amdgpu_ctx *ctx; /* chunks */ unsigned nchunks; struct amdgpu_cs_chunk *chunks; /* scheduler job object */ struct amdgpu_job *job; struct drm_sched_entity *entity; + unsigned hw_ip; /* buffer objects */ struct ww_acquire_ctx ticket; struct amdgpu_bo_list *bo_list; struct amdgpu_mn *mn; struct amdgpu_bo_list_entry vm_pd; struct list_head validated; struct dma_fence *fence; uint64_t bytes_moved_threshold; uint64_t bytes_moved_vis_threshold; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c691df6f7a57..9125cd69a124 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (r) goto error_validate; amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); gds = p->bo_list->gds_obj; gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; + if (p->hw_ip == AMDGPU_HW_IP_GFX) { + /* Only gfx10 allocates these. */ + if (!gds) + gds = p->adev->gds.gds_gfx_bo; + if (!oa) + oa = p->adev->gds.oa_gfx_bo; + } + amdgpu_bo_list_for_each_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); /* Make sure we use the exclusive slot for shared BOs */ if (bo->prime_shared_count) e->tv.num_shared = 0; e->bo_va = amdgpu_vm_bo_find(vm, bo); } if (gds) { @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct drm_sched_entity *entity; chunk = &parser->chunks[i]; ib = &parser->job->ibs[j]; chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; + parser->hw_ip = chunk_ib->ip_type; + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) ce_preempt++; else de_preempt++; } /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index df8a23554831..0943b8e1d97e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -26,20 +26,26 @@ struct amdgpu_ring; struct amdgpu_bo; struct amdgpu_gds { uint32_t gds_size; uint32_t gws_size; uint32_t oa_size; uint32_t gds_compute_max_wave_id; uint32_t vgt_gs_max_wave_id; + + /* Reserved OA for the gfx ring. (gfx10) */ + uint32_t gds_gfx_reservation_size; + uint32_t oa_gfx_reservation_size; + struct amdgpu_bo *gds_gfx_bo; + struct amdgpu_bo *oa_gfx_bo; }; struct amdgpu_gds_reg_offset { uint32_t mem_base; uint32_t mem_size; uint32_t gws; uint32_t oa; }; #endif /* __AMDGPU_GDS_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 618291df659b..3952754c04ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle) kiq = &adev->gfx.kiq; r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); if (r) return r; r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); if (r) return r; + /* allocate reserved GDS resources for transform feedback */ + r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size, + 4, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size, + 1, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); + if (r) + return r; + /* allocate visible FB for rlc auto-loading fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); if (r) return r; } adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; gfx_v10_0_gpu_early_init(adev); @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, &adev->gfx.me.me_fw_gpu_addr, (void **)&adev->gfx.me.me_fw_ptr); } static int gfx_v10_0_sw_fini(void *handle) { int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); + for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v10_0_pfp_fini(adev); @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) case CHIP_NAVI10: default: adev->gds.gds_size = 0x10000; adev->gds.gds_compute_max_wave_id = 0x4ff; adev->gds.vgt_gs_max_wave_id = 0x3ff; break; } adev->gds.gws_size = 64; adev->gds.oa_size = 16; + + /* Reserved for transform feedback. */ + adev->gds.gds_gfx_reservation_size = 256; + adev->gds.oa_gfx_reservation_size = 4; } static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, u32 bitmap) { u32 data; if (!bitmap) return; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx