On 13/10/2017, 5:16 PM, "Christian König" <ckoenig.leichtzumerken at gmail.com> wrote: >Am 13.10.2017 um 10:26 schrieb Pixel Ding: >> From: pding <Pixel.Ding at amd.com> >> >> Register accessing is performed when IRQ is disabled. Never sleep in >> this function. >> >> Known issue: dead sleep in many use cases of index/data registers. >> >> Signed-off-by: pding <Pixel.Ding at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++--- >> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++++++++++++++++++------------ >> 6 files changed, 40 insertions(+), 28 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index ca212ef..f9de756 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -886,6 +886,7 @@ struct amdgpu_kiq { >> u64 eop_gpu_addr; >> struct amdgpu_bo *eop_obj; >> struct mutex ring_mutex; > >You can remove the ring_mutex if you don't use it any more. [Pixel] KFD still use the mutex, I didnâ??t change it also to spin lock, should I? > >> + spinlock_t ring_lock; >> struct amdgpu_ring ring; >> struct amdgpu_irq_src irq; >> }; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index ab8f0d6..1197274 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, >> { >> uint32_t ret; >> >> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { >> - BUG_ON(in_interrupt()); >> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) >> return amdgpu_virt_kiq_rreg(adev, reg); >> - } >> >> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) >> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); >> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, >> adev->last_mm_index = v; >> } >> >> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { >> - BUG_ON(in_interrupt()); >> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) >> return amdgpu_virt_kiq_wreg(adev, reg, v); >> - } >> >> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) >> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> index 2044758..c6c7bf3 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> @@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq) >> * Reads a fence value from memory (all asics). >> * Returns the value of the fence read from memory. >> */ >> -static u32 amdgpu_fence_read(struct amdgpu_ring *ring) >> +u32 amdgpu_fence_read(struct amdgpu_ring *ring) >> { >> struct amdgpu_fence_driver *drv = &ring->fence_drv; >> u32 seq = 0; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index 4f6c68f..46fa88c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, >> int r = 0; >> >> mutex_init(&kiq->ring_mutex); >> + spin_lock_init(&kiq->ring_lock); >> >> r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); >> if (r) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> index af8e544..a4fa923 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> @@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, >> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); >> void amdgpu_fence_driver_resume(struct amdgpu_device *adev); >> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); >> +u32 amdgpu_fence_read(struct amdgpu_ring *ring); >> void amdgpu_fence_process(struct amdgpu_ring *ring); >> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); >> unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> index ab05121..9757df1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> @@ -22,7 +22,7 @@ >> */ >> >> #include "amdgpu.h" >> -#define MAX_KIQ_REG_WAIT 100000 >> +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ >You want to busy wait 100 seconds for an result? > >Forget it that is way to long you will certainly run into problems with >the NMI much earlier. >[Pixel] thanks, would change it to 10s. > >> >> int amdgpu_allocate_static_csa(struct amdgpu_device *adev) >> { >> @@ -113,28 +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) >> >> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) >> { >> - signed long r; >> - uint32_t val; >> - struct dma_fence *f; >> + signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT); >> + unsigned long flags; >> + uint32_t val, seq, wait_seq; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_rreg); >> >> - mutex_lock(&kiq->ring_mutex); >> + spin_lock_irqsave(&kiq->ring_lock, flags); >> amdgpu_ring_alloc(ring, 32); >> amdgpu_ring_emit_rreg(ring, reg); >> - amdgpu_fence_emit(ring, &f); >> + wait_seq = ++ring->fence_drv.sync_seq; >> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, >> + wait_seq, AMDGPU_FENCE_FLAG_INT); > >Do you still need to send an interrupt when you busy wait? I don't think so. [Pixel]I want to let the IRQ handler to update the last_seq in case of that KIQ is used by other functionality with fence. Is it necessary? > >> amdgpu_ring_commit(ring); >> - mutex_unlock(&kiq->ring_mutex); >> - >> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); >> - dma_fence_put(f); >> - if (r < 1) { >> - DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + spin_unlock_irqrestore(&kiq->ring_lock, flags); >> + do { >> + seq = amdgpu_fence_read(ring); >> + udelay(5); >> + timeout -= 5; >> + } while (seq < wait_seq && timeout > 0); > >You need to correctly handle sequence wrap around here. [Pixel] yes thanks, but I didnâ??t see similar case handling in normal fence handling codepath, Is there? So I assumed 32bit fence seq could not overflow for years. > >> + >> + if (timeout < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld\n", timeout); >> return ~0; >> } >> - >> val = adev->wb.wb[adev->virt.reg_val_offs]; >> >> return val; >> @@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) >> >> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) >> { >> - signed long r; >> + signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT); >> + unsigned long flags; >> + uint32_t seq, wait_seq; >> struct dma_fence *f; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_wreg); >> >> - mutex_lock(&kiq->ring_mutex); >> + spin_lock_irqsave(&kiq->ring_lock, flags); >> amdgpu_ring_alloc(ring, 32); >> amdgpu_ring_emit_wreg(ring, reg, v); >> - amdgpu_fence_emit(ring, &f); >> + /* amdgpu_fence_emit(ring, &f); */ >> + wait_seq = ++ring->fence_drv.sync_seq; >> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, >> + wait_seq, AMDGPU_FENCE_FLAG_INT); >> amdgpu_ring_commit(ring); >> - mutex_unlock(&kiq->ring_mutex); >> + spin_unlock_irqrestore(&kiq->ring_lock, flags); >> + >> + do { >> + seq = amdgpu_fence_read(ring); >> + udelay(5); >> + timeout -= 5; >> + } while (seq < wait_seq && timeout > 0); > >Same here. It would probably be better to have a function for this in >amdgpu_fence.c [Pixel] get it, will add a wrapper. > >Regards, >Christian. > >> >> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); >> - if (r < 1) >> - DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> - dma_fence_put(f); >> + if (timeout < 1) >> + DRM_ERROR("wait for kiq fence error: %ld\n", timeout); >> } >> >> /** > > >â?? >Sincerely Yours, >Pixel > > > >