[AMD Official Use Only - Internal Distribution Only] I think each kiq operation will call ring_alloc for the package space , why not just check whether this allocation is succeed or not ? Shaoyun.liu -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Yintian Tao Sent: Wednesday, April 22, 2020 10:50 AM To: Koenig, Christian <Christian.Koenig@xxxxxxx>; Liu, Monk <Monk.Liu@xxxxxxx>; Kuehling, Felix <Felix.Kuehling@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Tao, Yintian <Yintian.Tao@xxxxxxx> Subject: [PATCH] drm/amdgpu: protect kiq overrun Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value Signed-off-by: Yintian Tao <yttao@xxxxxxx> --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 6 ++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 +++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++ 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) { + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ee698f0246d8..1ee59a927bd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -103,6 +103,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + uint32_t max_sub_num; }; /* @@ -387,4 +388,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); + +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..4b027006d072 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -57,6 +57,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t seq; spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq; + } + amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5b1549f167b0..a136e2229f7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4051,6 +4051,12 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 30b75d79efdb..77d8bc9c0111 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -423,6 +423,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (amdgpu_emu_mode == 0 && ring->sched.ready) { spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock(&kiq->ring_lock); + DRM_ERROR("too many kiq submissions\n"); + return -ETIME; + } + /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index fecdbc471983..c429a2a5fe3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -614,6 +614,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, ndw += kiq->pmf->invalidate_tlbs_size; spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock(&kiq->ring_lock); + DRM_ERROR("critical bug! too many kiq submissions\n"); + return -ETIME; + } + /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, ndw); if (vega20_xgmi_wa) -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CShaoyun.Liu%40amd.com%7C0cd26307c0a149ebe0bd08d7e6cc84b1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637231638419034830&sdata=rxkhyMutFwbe5Nw%2BeBGiESW9wTdflDUo%2F4xEvbCbR6U%3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx