For certain ASICs where dequeue_wait_count don't need to be initialized, pm_config_dequeue_wait_counts_v9 return without filling in the packet information. However, the calling function interprets this as a success and sends the uninitialized packet to firmware causing hang. Fix the above bug by not calling pm_config_dequeue_wait_counts_v9 for ASICs that don't need the value to be initialized. v2: Removed redudant code. Tidy up code based on review comments v3: Don't call pm_config_dequeue_wait_counts_v9 for certain ASICs Fixes: <98a5af8103f> ("drm/amdkfd: Add pm_config_dequeue_wait_counts API") Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx> --- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 16 ++++++---- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 30 +++++++++++-------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 3f574d82b5fc..8a47b7259a10 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm, !pm->pmf->config_dequeue_wait_counts_size) return 0; + if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) + return 0; + size = pm->pmf->config_dequeue_wait_counts_size; mutex_lock(&pm->lock); @@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm, retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, cmd, value); - if (!retval) + if (!retval) { retval = kq_submit_packet(pm->priv_queue); + + /* If default value is modified, cache that in dqm->wait_times */ + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + } else kq_rollback_packet(pm->priv_queue); } - - /* If default value is modified, cache that value in dqm->wait_times */ - if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) - update_dqm_wait_times(pm->dqm); - out: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index d440df602393..f059041902bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -310,6 +310,13 @@ static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manage reg_data); } +/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with + * register/value for configuring dequeue wait counts + * + * @return: -ve for failure and 0 for success and buffer is + * filled in with packet + * + **/ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t *buffer, enum kfd_config_dequeue_wait_counts_cmd cmd, @@ -321,24 +328,21 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, switch (cmd) { case KFD_DEQUEUE_WAIT_INIT: { - uint32_t sch_wave = 0, que_sleep = 0; - /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. + uint32_t sch_wave = 0, que_sleep = 1; + + /* For all gfx9 ASICs > gfx941, + * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. * On a 1GHz machine this is roughly 1 microsecond, which is * about how long it takes to load data out of memory during * queue connect * QUE_SLEEP: Wait Count for Dequeue Retry. + * + * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && - KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { - que_sleep = 1; - - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) - sch_wave = 1; - } else { - return 0; - } + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + sch_wave = 1; + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, ®_offset, ®_data); -- 2.34.1