From: Le Ma <le.ma@xxxxxxx> Ring aggregated doorbel to make unmapped queue scheduled in mes firmware. Signed-off-by: Le Ma <le.ma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 3 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 82 +++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 79 +++++++++++++++++------- 4 files changed, 137 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e664f714e4ba..9a4c4c905eab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -725,6 +725,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue->queue_type = qprops->queue_type; queue->paging = qprops->paging; queue->gang = gang; + queue->ring->mqd_ptr = queue->mqd_cpu_ptr; list_add_tail(&queue->list, &gang->queue_list); amdgpu_mes_unlock(&adev->mes); @@ -1081,6 +1082,12 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, kfree(ring); } +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio) +{ + return adev->mes.aggregated_doorbells[prio]; +} + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 9b011a40906b..f4691b6c3939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -346,6 +346,9 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, void amdgpu_mes_remove_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring); +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio); + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data); void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index abf2bf7f1a79..5820c3f0e215 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8525,14 +8525,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } } } @@ -8557,13 +8588,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - BUG(); /* only DOORBELL method supported on gfx10 now */ + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx10 now */ + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1f9021f896a1..a019ac92edb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -389,34 +389,67 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr << 2) == 0x%08x " - "upper_32_bits(ring->wptr << 2) == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, + ring->wptr << 2); + } } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } } -- 2.35.1