Am 17.10.2017 um 08:37 schrieb Pixel Ding: > From: pding <Pixel.Ding at amd.com> > > Register accessing is performed when IRQ is disabled. Never sleep in > this function. > > Known issue: dead sleep in many use cases of index/data registers. > > v2: wrap polling fence functions. don't trigger IRQ for polling in > case of wrongly fence signal. > > Signed-off-by: pding <Pixel.Ding at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +--- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 53 +++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 30 ++++++------- > 8 files changed, 78 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index ca212ef..cc06e62 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -885,7 +885,7 @@ struct amdgpu_mec { > struct amdgpu_kiq { > u64 eop_gpu_addr; > struct amdgpu_bo *eop_obj; > - struct mutex ring_mutex; > + spinlock_t ring_lock; > struct amdgpu_ring ring; > struct amdgpu_irq_src irq; > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > index 9d9965d..6d83573 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > @@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) > struct dma_fence *f; > struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > > - mutex_lock(&adev->gfx.kiq.ring_mutex); > + spin_lock(&adev->gfx.kiq.ring_lock); > amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ > amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > amdgpu_ring_write(ring, > @@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) > PACKET3_INVALIDATE_TLBS_PASID(pasid)); > amdgpu_fence_emit(ring, &f); > amdgpu_ring_commit(ring); > - mutex_unlock(&adev->gfx.kiq.ring_mutex); > + spin_unlock(&adev->gfx.kiq.ring_lock); > > r = dma_fence_wait(f, false); > if (r) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index edbae19..c92217f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) > struct dma_fence *f; > struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > > - mutex_lock(&adev->gfx.kiq.ring_mutex); > + spin_lock(&adev->gfx.kiq.ring_lock); > amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ > amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); > amdgpu_ring_write(ring, > @@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) > PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2)); > amdgpu_fence_emit(ring, &f); > amdgpu_ring_commit(ring); > - mutex_unlock(&adev->gfx.kiq.ring_mutex); > + spin_unlock(&adev->gfx.kiq.ring_lock); > > r = dma_fence_wait(f, false); > if (r) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index ab8f0d6..1197274 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, > { > uint32_t ret; > > - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { > - BUG_ON(in_interrupt()); > + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) > return amdgpu_virt_kiq_rreg(adev, reg); > - } > > if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) > ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); > @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, > adev->last_mm_index = v; > } > > - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { > - BUG_ON(in_interrupt()); > + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) > return amdgpu_virt_kiq_wreg(adev, reg, v); > - } > > if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) > writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 688740e..68a5e90 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) > } > > /** > + * amdgpu_fence_emit_polling - emit a fence on the requeste ring > + * > + * @ring: ring the fence is associated with > + * @s: resulting sequence number > + * > + * Emits a fence command on the requested ring (all asics). > + * Used For polling fence. > + * Returns 0 on success, -ENOMEM on failure. > + */ > +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) > +{ > + uint32_t seq; > + > + if (!s) > + return -EINVAL; > + > + seq = ++ring->fence_drv.sync_seq; > + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > + seq, AMDGPU_FENCE_FLAG_INT); > + > + *s = seq; > + > + return 0; > +} > + > +/** > * amdgpu_fence_schedule_fallback - schedule fallback check > * > * @ring: pointer to struct amdgpu_ring > @@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) > return r; > } > Please add some documentation here that timeout is in usec. > +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, > + uint32_t wait_seq, > + signed long timeout) > +{ > + uint32_t seq, last_seq; > + struct amdgpu_fence_driver *drv = &ring->fence_drv; > + > + last_seq = atomic_read(&ring->fence_drv.last_seq); > + > + do { > + seq = amdgpu_fence_read(ring); > + > + if (unlikely(seq == last_seq)) > + break; That doesn't look correct to me, it will abort the busy wait as soon as we see the last value we have seen. > + if (seq >= wait_seq && wait_seq >= last_seq) > + break; > + if (seq <= last_seq && > + (wait_seq <= seq || wait_seq >= last_seq)) > + break; Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be sufficient for a wrap around test. Regards, Christian. > + udelay(5); > + timeout -= 5; > + } while (timeout > 0); > + > + atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq); > + > + return timeout; > +} > /** > * amdgpu_fence_count_emitted - get the count of emitted fences > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index 4f6c68f..e5a9077 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > int r = 0; > > - mutex_init(&kiq->ring_mutex); > + spin_lock_init(&kiq->ring_lock); > > r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); > if (r) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index af8e544..9de89ea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, > void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); > void amdgpu_fence_driver_resume(struct amdgpu_device *adev); > int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); > +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); > void amdgpu_fence_process(struct amdgpu_ring *ring); > int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); > +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, > + uint32_t wait_seq, > + signed long timeout); > unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index ab05121..177fe10 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -22,7 +22,7 @@ > */ > > #include "amdgpu.h" > -#define MAX_KIQ_REG_WAIT 100000 > +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ > > int amdgpu_allocate_static_csa(struct amdgpu_device *adev) > { > @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) > uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > { > signed long r; > - uint32_t val; > - struct dma_fence *f; > + uint32_t val, seq; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > struct amdgpu_ring *ring = &kiq->ring; > > BUG_ON(!ring->funcs->emit_rreg); > > - mutex_lock(&kiq->ring_mutex); > + spin_lock(&kiq->ring_lock); > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_rreg(ring, reg); > - amdgpu_fence_emit(ring, &f); > + amdgpu_fence_emit_polling(ring, &seq); > amdgpu_ring_commit(ring); > - mutex_unlock(&kiq->ring_mutex); > + spin_unlock(&kiq->ring_lock); > > - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); > - dma_fence_put(f); > + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); > if (r < 1) { > - DRM_ERROR("wait for kiq fence error: %ld.\n", r); > + DRM_ERROR("wait for kiq fence error: %ld\n", r); > return ~0; > } > - > val = adev->wb.wb[adev->virt.reg_val_offs]; > > return val; > @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) > { > signed long r; > - struct dma_fence *f; > + uint32_t seq; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > struct amdgpu_ring *ring = &kiq->ring; > > BUG_ON(!ring->funcs->emit_wreg); > > - mutex_lock(&kiq->ring_mutex); > + spin_lock(&kiq->ring_lock); > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_wreg(ring, reg, v); > - amdgpu_fence_emit(ring, &f); > + amdgpu_fence_emit_polling(ring, &seq); > amdgpu_ring_commit(ring); > - mutex_unlock(&kiq->ring_mutex); > + spin_unlock(&kiq->ring_lock); > > - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); > + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); > if (r < 1) > - DRM_ERROR("wait for kiq fence error: %ld.\n", r); > - dma_fence_put(f); > + DRM_ERROR("wait for kiq fence error: %ld\n", r); > } > > /**