Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of CUs bouncing between GFX and high priority compute and introducing further latency, we reserve CUs 2+ for high priority compute on-demand. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 117 +++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9ffe1d3..1510a78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1100,6 +1100,10 @@ struct amdgpu_gfx { bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + spinlock_t pipe_reserve_lock; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4c8ec52..69df11e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1880,6 +1880,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; @@ -1926,6 +1927,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->gfx.kiq.ring_lock); spin_lock_init(&adev->gfx.kiq.srbm_lock); + spin_lock_init(&adev->gfx.pipe_reserve_lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index be40fc9..a043bff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -53,6 +53,10 @@ #define GFX8_NUM_GFX_RINGS 1 #define GFX8_MEC_HPD_SIZE 2048 +#define GFX8_CU_RESERVE_RESOURCES 0x45888 +#define GFX8_CU_NUM 8 +#define GFX8_UNRESERVED_CU_NUM 2 +#define GFX8_CU_RESERVE_PIPE_SHIFT 7 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -6649,6 +6653,118 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + /* There is a bug in the GFX pipes that results in a HS + * deadlock if the pipe is restricted to a percentage + * lower than 17 */ + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE && pipe_percent < 17) + pipe_percent = 17; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32_KIQ_ASYNC(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + spin_lock(&adev->gfx.pipe_reserve_lock); + pipe = amdgpu_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + } + + spin_unlock(&adev->gfx.pipe_reserve_lock); +} + +static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + spin_lock(&adev->gfx.kiq.srbm_lock); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0, true); + + switch (priority) { + case AMD_SCHED_PRIORITY_NORMAL: + WREG32_KIQ_ASYNC(mmCP_HQD_PIPE_PRIORITY, 0x0); + WREG32_KIQ_ASYNC(mmCP_HQD_QUEUE_PRIORITY, 0x0); + break; + case AMD_SCHED_PRIORITY_HIGH: + WREG32_KIQ_ASYNC(mmCP_HQD_PIPE_PRIORITY, 0x2); + WREG32_KIQ_ASYNC(mmCP_HQD_QUEUE_PRIORITY, 0xf); + break; + default: + WARN(1, "Attempt to set invalid SPI priority:%d for ring:%d\n", + priority, ring->idx); + break; + } + + vi_srbm_select(adev, 0, 0, 0, 0, true); + spin_unlock(&adev->gfx.kiq.srbm_lock); +} +static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v8_0_hqd_set_priority(adev, ring, priority); + gfx_v8_0_pipe_reserve_resources(adev, ring, priority >= AMD_SCHED_PRIORITY_HIGH); +} + static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) @@ -7075,6 +7191,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v8_0_ring_set_priority_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { -- 2.9.3