Currently, we unmap HIQ by directly writing to HQD registers. This doesn't work for GFX9.4.3. Instead, use KIQ to unmap HIQ, similar to how we use KIQ to map HIQ. Using KIQ to unmap HIQ works for all GFX series post GFXv9. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 47 ++++++++++++++++++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 3 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 47 ++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 48 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 ++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 4 ++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 7 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++ 13 files changed, 170 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 5b4b7f8b92a5..b82435e17ed0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -372,6 +372,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump, .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v9_hiq_hqd_destroy, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy, .wave_control_execute = kgd_gfx_v9_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 8ad7a7779e14..a919fb8e09a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -510,6 +510,52 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) return false; } +int kgd_gfx_v10_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + struct v10_compute_mqd *m = get_mqd(mqd); + uint32_t mec, pipe; + uint32_t doorbell_off; + int r; + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + acquire_queue(adev, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + spin_lock(&adev->gfx.kiq[0].ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 6); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(0) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq[0].ring_lock); + release_queue(adev); + + return r; +} + static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, @@ -1034,6 +1080,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v10_hiq_hqd_destroy, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, .wave_control_execute = kgd_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index e6b70196071a..00b4514ebdd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -53,3 +53,6 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +int kgd_gfx_v10_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 8c8437a4383f..98b3a8cdc2c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -666,6 +666,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_sdma_dump = hqd_sdma_dump_v10_3, .hqd_is_occupied = hqd_is_occupied_v10_3, .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, + .hiq_hqd_destroy = kgd_gfx_v10_hiq_hqd_destroy, .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, .wave_control_execute = wave_control_execute_v10_3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index 91c3574ebed3..d0ef32ec76df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -485,6 +485,52 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd) return false; } +int hiq_hqd_destroy_v11(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + struct v11_compute_mqd *m = get_mqd(mqd); + uint32_t mec, pipe; + uint32_t doorbell_off; + int r; + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + acquire_queue(adev, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + spin_lock(&adev->gfx.kiq[0].ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 6); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(0) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq[0].ring_lock); + release_queue(adev); + + return r; +} + static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, @@ -796,6 +842,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .hqd_sdma_dump = hqd_sdma_dump_v11, .hqd_is_occupied = hqd_is_occupied_v11, .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11, + .hiq_hqd_destroy = hiq_hqd_destroy_v11, .hqd_destroy = hqd_destroy_v11, .hqd_sdma_destroy = hqd_sdma_destroy_v11, .wave_control_execute = wave_control_execute_v11, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51d93fb13ea3..c1708de38b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -523,6 +523,53 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) return false; } +int kgd_gfx_v9_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring; + struct v9_mqd *m; + uint32_t mec, pipe; + uint32_t doorbell_off; + int r; + + m = get_mqd(mqd); + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + spin_lock(&adev->gfx.kiq[inst].ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 6); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(0) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + kgd_gfx_v9_release_queue(adev, inst); + + return r; +} + int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, @@ -1154,6 +1201,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v9_hiq_hqd_destroy, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, .wave_control_execute = kgd_gfx_v9_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 5f54bff0db49..37b76c66f0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -45,6 +45,9 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id, uint32_t inst); +int kgd_gfx_v9_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd, uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 863cf060af48..83101b3d341a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -193,6 +193,14 @@ int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, queue_id, p->doorbell_off, 0); } +int kfd_destroy_hiq_hqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hiq_hqd_destroy(mm->dev->adev, mqd, + pipe_id, queue_id, 0); +} + int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 23158db7da03..8ad8c8087da8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -173,4 +173,8 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev); uint64_t kfd_mqd_stride(struct mqd_manager *mm, struct queue_properties *q); + +int kfd_destroy_hiq_hqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id); #endif /* KFD_MQD_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 94c0fc2e57b7..40c0ebb5e7ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -460,7 +460,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = kfd_hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = kfd_destroy_hiq_hqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v10_compute_mqd); mqd->mqd_stride = kfd_mqd_stride; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 31fec5e70d13..79f245efde5a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -449,7 +449,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = kfd_hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = kfd_destroy_hiq_hqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 601bb9f68048..7893c53c9372 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -553,9 +553,8 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, for_each_inst(xcc_id, xcc_mask) { xcc_mqd = mqd + hiq_mqd_size * inst; - err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd, - type, timeout, pipe_id, - queue_id, xcc_id); + err = mm->dev->kfd2kgd->hiq_hqd_destroy(mm->dev->adev, xcc_mqd, + pipe_id, queue_id, xcc_id); if (err) { pr_debug("Destroy MQD failed for xcc: %d\n", inst); break; @@ -846,7 +845,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, } else { mqd->init_mqd = init_mqd_hiq; mqd->load_mqd = kfd_hiq_load_mqd_kiq; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = kfd_destroy_hiq_hqd; } break; case KFD_MQD_TYPE_DIQ: diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index d0df3381539f..0f242aad22c7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -268,6 +268,9 @@ struct kfd2kgd_calls { unsigned int timeout, uint32_t pipe_id, uint32_t queue_id, uint32_t inst); + int (*hiq_hqd_destroy)(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, uint32_t inst); + bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd); int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, -- 2.35.1