1. Use spin lock instead of mutex in KIQ 2. Directly write to KIQ fence address instead of using fence_emit() 3. Disable the interrupt for KIQ read/write and use CPU polling Change-Id: Id3693a2878ce1338f55aee3def6e7fc0f6b81996 Signed-off-by: Shaoyun Liu <Shaoyun.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 57 ++++++++++++++++++++++---------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ab1dad2..a155206 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -955,7 +955,7 @@ struct amdgpu_mec { struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; - struct mutex ring_mutex; + spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e26108a..e5e5541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -184,7 +184,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; - mutex_init(&kiq->ring_mutex); + spin_lock_init(&kiq->ring_lock); r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8a081e1..95b4975 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -116,24 +116,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; uint32_t val; - struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; + unsigned long end_jiffies; + uint32_t seq; + volatile uint32_t *f; BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit(ring, &f); + f = ring->fence_drv.cpu_addr; + *f = 0; + seq = ++ring->fence_drv.sync_seq; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); - - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); - dma_fence_put(f); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return ~0; + spin_unlock(&kiq->ring_lock); + + end_jiffies = (MAX_KIQ_REG_WAIT * HZ / 1000) + jiffies; + while (true) { + if (*f >= seq) + break; + if (time_after(jiffies, end_jiffies)) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return ~0; + } + cpu_relax(); } val = adev->wb.wb[adev->virt.reg_val_offs]; @@ -144,23 +153,35 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; - struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; + unsigned long end_jiffies; + uint32_t seq; + volatile uint32_t *f; BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit(ring, &f); + f = ring->fence_drv.cpu_addr; + *f = 0; + seq = ++ring->fence_drv.sync_seq; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock(&kiq->ring_lock); + + end_jiffies = (MAX_KIQ_REG_WAIT * HZ / 1000) + jiffies; + while (true) { + if (*f >= seq) + break; + if (time_after(jiffies, end_jiffies)) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return; + } + cpu_relax(); + } - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); - if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - dma_fence_put(f); } /** -- 1.9.1