Add WREG32_KIQ_ASYNC() to allow for a caller to perform a register write without waiting for the result to be commited. This allows us to queue register writes from a context that cannot sleep. It may also be useful for long sequences of register writes performed through the kiq. Where only the last write actually needs to be synchronized. Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ca040e6..50bfead 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1669,7 +1669,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags); u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); @@ -1683,11 +1684,13 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define AMDGPU_REGS_IDX (1<<0) #define AMDGPU_REGS_NO_KIQ (1<<1) #define AMDGPU_REGS_FORCE_KIQ (1<<2) +#define AMDGPU_REGS_KIQ_ASYNC (1<<3) #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_FORCE_KIQ) #define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_FORCE_KIQ) +#define WREG32_KIQ_ASYNC(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_FORCE_KIQ | AMDGPU_REGS_KIQ_ASYNC) #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6584d7e..3469b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -123,10 +123,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); if ((!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - || acc_flags & AMDGPU_REGS_FORCE_KIQ) { - BUG_ON(in_interrupt()); - return amdgpu_kiq_wreg(adev, reg, v); - } + || acc_flags & AMDGPU_REGS_FORCE_KIQ) + return amdgpu_kiq_wreg(adev, reg, v, acc_flags); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -188,10 +186,11 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) return val; } -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) { signed long r; - struct dma_fence *f; + struct dma_fence *f = NULL; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; @@ -200,17 +199,20 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit(ring, &f); + if (!(acc_flags & AMDGPU_REGS_KIQ_ASYNC)) + amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); spin_unlock(&kiq->ring_lock); - r = dma_fence_wait(f, false); - if (r) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - dma_fence_put(f); + if (f) { + BUG_ON(in_interrupt()); + r = dma_fence_wait(f, false); + if (r) + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + dma_fence_put(f); + } } - /** * amdgpu_mm_rdoorbell - read a doorbell dword * -- 2.9.3