Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 +++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3a0561c..04ea1b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1097,30 +1097,34 @@ struct amdgpu_gfx { /* gfx status */ uint32_t gfx_current_status; /* ce ram size*/ unsigned ce_ram_size; struct amdgpu_cu_info cu_info; const struct amdgpu_gfx_funcs *funcs; /* reset mask */ uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; /* s3/s4 mask */ bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); /* * CS. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0296c9e..424ac3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2012,58 +2012,60 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->dev = &pdev->dev; adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; adev->mc.gtt_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; adev->pcie_rreg = &amdgpu_invalid_rreg; adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; adev->didt_wreg = &amdgpu_invalid_wreg; adev->gc_cac_rreg = &amdgpu_invalid_rreg; adev->gc_cac_wreg = &amdgpu_invalid_wreg; adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); /* mutex initialization are all done here so we * can recall function without having locking issues */ atomic_set(&adev->irq.ih.lock, 0); mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); + mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); hash_init(adev->mn_hash); amdgpu_check_arguments(adev); /* Registers mapping */ /* TODO: block userspace mapping of io register */ spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); spin_lock_init(&adev->pcie_idx_lock); spin_lock_init(&adev->uvd_ctx_idx_lock); spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->gc_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6e541af..db00816 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6449,30 +6449,134 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->wptr_offs]; } static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + /* There is a bug in the GFX pipes that results in a HS + * deadlock if the pipe is restricted to a percentage + * lower than 17 */ + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE && pipe_percent < 17) + pipe_percent = 17; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(&adev->gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(&adev->gfx.pipe_reserve_mutex); +} + +static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} +static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v8_0_hqd_set_priority(adev, ring, acquire); + gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); +} + static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6869,30 +6973,31 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v8_0_ring_set_priority_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .type = AMDGPU_RING_TYPE_KIQ, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, .emit_frame_size = 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ -- 2.9.3