Hi Christian Thanks. I will remove the initialization of r. Best Regards Yintian Tao -----Original Message----- From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> Sent: 2020年4月23日 20:22 To: Tao, Yintian <Yintian.Tao@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Liu, Monk <Monk.Liu@xxxxxxx>; Liu, Shaoyun <Shaoyun.Liu@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH] drm/amdgpu: protect ring overrun Am 23.04.20 um 11:06 schrieb Yintian Tao: > Wait for the oldest sequence on the ring to be signaled in order to > make sure there will be no command overrun. > > v2: fix coding stype and remove abs operation One nit pick below, with that fixed the patch is Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > Signed-off-by: Yintian Tao <yttao@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 - > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++++++--- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++- > 9 files changed, 61 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 7531527067df..397bd5fa77cb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, > * Used For polling fence. > * Returns 0 on success, -ENOMEM on failure. > */ > -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) > +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, > + uint32_t timeout) > { > uint32_t seq; > + signed long r = 0; Please drop the initialization of r here. That is usually seen as rather bad style because it prevents the compiler from raising an warning when this really isn't initialized. Regards, Christian. > > if (!s) > return -EINVAL; > > seq = ++ring->fence_drv.sync_seq; > + r = amdgpu_fence_wait_polling(ring, > + seq - ring->fence_drv.num_fences_mask, > + timeout); > + if (r < 1) > + return -ETIMEDOUT; > + > amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > seq, 0); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a721b0e0ff69..0103acc57474 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device > *adev, uint32_t reg) > > spin_lock_irqsave(&kiq->ring_lock, flags); > if (amdgpu_device_wb_get(adev, ®_val_offs)) { > - spin_unlock_irqrestore(&kiq->ring_lock, flags); > pr_err("critical bug! too many kiq readers\n"); > - goto failed_kiq_read; > + goto failed_unlock; > } > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > amdgpu_device_wb_free(adev, reg_val_offs); > return value; > > +failed_undo: > + amdgpu_ring_undo(ring); > +failed_unlock: > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > failed_kiq_read: > + if (reg_val_offs) > + amdgpu_device_wb_free(adev, reg_val_offs); > pr_err("failed to read reg:%x\n", reg); > return ~0; > } > @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) > spin_lock_irqsave(&kiq->ring_lock, flags); > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_wreg(ring, reg, v); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, > uint32_t reg, uint32_t v) > > return; > > +failed_undo: > + amdgpu_ring_undo(ring); > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > failed_kiq_write: > pr_err("failed to write reg:%x\n", reg); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 137d3d2b46e8..be218754629a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); > void amdgpu_fence_driver_resume(struct amdgpu_device *adev); > int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, > unsigned flags); > -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); > +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, > + uint32_t timeout); > bool amdgpu_fence_process(struct amdgpu_ring *ring); > int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); > signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, diff > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index 8c10084f44ef..cbbb8d02535a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, > ref, mask); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct > amdgpu_device *adev, > > return; > > +failed_undo: > + amdgpu_ring_undo(ring); > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > failed_kiq: > pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 8a579ce8757d..2b6d7687f4cb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -7605,7 +7605,6 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > uint32_t reg_val_offs) > { > struct amdgpu_device *adev = ring->adev; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq; > > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 6c56cedce4c3..6ae78b9e9551 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -6393,7 +6393,6 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > uint32_t reg_val_offs) > { > struct amdgpu_device *adev = ring->adev; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq; > > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 09aa5f509bd2..be05ab221b16 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct > amdgpu_device *adev) > > spin_lock_irqsave(&kiq->ring_lock, flags); > if (amdgpu_device_wb_get(adev, ®_val_offs)) { > - spin_unlock_irqrestore(&kiq->ring_lock, flags); > pr_err("critical bug! too many kiq readers\n"); > - goto failed_kiq_read; > + goto failed_unlock; > } > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); @@ -4070,7 > +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) > reg_val_offs * 4)); > amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + > reg_val_offs * 4)); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) > amdgpu_device_wb_free(adev, reg_val_offs); > return value; > > +failed_undo: > + amdgpu_ring_undo(ring); > +failed_unlock: > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > failed_kiq_read: > + if (reg_val_offs) > + amdgpu_device_wb_free(adev, reg_val_offs); > pr_err("failed to read gpu clock\n"); > return ~0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 30b75d79efdb..b120f9160f13 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); > kiq->pmf->kiq_invalidate_tlbs(ring, > pasid, flush_type, all_hub); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) { > + amdgpu_ring_undo(ring); > + spin_unlock(&kiq->ring_lock); > + return -ETIME; > + } > + > amdgpu_ring_commit(ring); > spin_unlock(&adev->gfx.kiq.ring_lock); > r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); diff > --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index fecdbc471983..0a6026308343 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, > pasid, 2, all_hub); > kiq->pmf->kiq_invalidate_tlbs(ring, > pasid, flush_type, all_hub); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) { > + amdgpu_ring_undo(ring); > + spin_unlock(&kiq->ring_lock); > + return -ETIME; > + } > + > amdgpu_ring_commit(ring); > spin_unlock(&adev->gfx.kiq.ring_lock); > r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx