[Public] > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Harish > Kasiviswanathan > Sent: Friday, March 14, 2025 8:44 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx> > Subject: [PATCH v3] drm/amdkfd: Fix bug in config_dequeue_wait_counts > > For certain ASICs where dequeue_wait_count don't need to be initialized, > pm_config_dequeue_wait_counts_v9 return without filling in the packet > information. However, the calling function interprets this as a success > and sends the uninitialized packet to firmware causing hang. > > Fix the above bug by not calling pm_config_dequeue_wait_counts_v9 for > ASICs that don't need the value to be initialized. > > v2: Removed redudant code. > Tidy up code based on review comments > v3: Don't call pm_config_dequeue_wait_counts_v9 for certain ASICs > > Fixes: <98a5af8103f> ("drm/amdkfd: Add pm_config_dequeue_wait_counts API") > > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx> Nit-picks below. With those addressed and as long as this has been tested on optimized and unoptimized HW: Reviewed-by: Jonathan Kim <jonathan.kim@xxxxxxx> > --- > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 16 ++++++---- > .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 30 +++++++++++-------- > 2 files changed, 27 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > index 3f574d82b5fc..8a47b7259a10 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > @@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct > packet_manager *pm, > !pm->pmf->config_dequeue_wait_counts_size) > return 0; > > + if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm- > >dev) < IP_VERSION(9, 4, 1) || > + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) > + return 0; > + > size = pm->pmf->config_dequeue_wait_counts_size; > > mutex_lock(&pm->lock); > @@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct > packet_manager *pm, > > retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, > cmd, value); > - if (!retval) > + if (!retval) { > retval = kq_submit_packet(pm->priv_queue); > + > + /* If default value is modified, cache that in dqm->wait_times > */ > + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) > + update_dqm_wait_times(pm->dqm); > + } > else > kq_rollback_packet(pm->priv_queue); Put else statement next to brace in line above and put braces after else statement to balance the if statement braces. > } > - > - /* If default value is modified, cache that value in dqm->wait_times */ > - if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) > - update_dqm_wait_times(pm->dqm); > - > out: > mutex_unlock(&pm->lock); > return retval; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > index d440df602393..f059041902bc 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > @@ -310,6 +310,13 @@ static inline void > pm_build_dequeue_wait_counts_packet_info(struct packet_manage > reg_data); > } > > +/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with > + * register/value for configuring dequeue wait counts > + * > + * @return: -ve for failure and 0 for success and buffer is > + * filled in with packet > + * > + **/ > static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, > uint32_t *buffer, > enum kfd_config_dequeue_wait_counts_cmd cmd, > @@ -321,24 +328,21 @@ static int pm_config_dequeue_wait_counts_v9(struct > packet_manager *pm, > > switch (cmd) { > case KFD_DEQUEUE_WAIT_INIT: { > - uint32_t sch_wave = 0, que_sleep = 0; > - /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default > 0x40. > + uint32_t sch_wave = 0, que_sleep = 1; Do the following here (start of case WAIT_INIT) for safety to explicitly state certain devices are not permitted to do WAIT_INIT since you're unconditionally setting que_sleep to 1: if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)) return -EPERM; Jon > + > + /* For all gfx9 ASICs > gfx941, > + * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default > 0x40. > * On a 1GHz machine this is roughly 1 microsecond, which is > * about how long it takes to load data out of memory during > * queue connect > * QUE_SLEEP: Wait Count for Dequeue Retry. > + * > + * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU > */ > - if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && > - KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { > - que_sleep = 1; > - > - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU > */ > - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev- > >gmc.is_app_apu && > - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, > 3))) > - sch_wave = 1; > - } else { > - return 0; > - } > + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev- > >gmc.is_app_apu && > + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) > + sch_wave = 1; > + > pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, > que_sleep, > ®_offset, ®_data); > > -- > 2.34.1