RE: [PATCH 2/2] drm/amdgpu: Reduce dequeue retry timeout for gfx9 family

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Harish
> Kasiviswanathan
> Sent: Wednesday, February 26, 2025 2:23 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx>
> Subject: [PATCH 2/2] drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
>
> Dequeue retry timeout controls the interval between checks for unmet
> conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The
> cost of additional bandwidth consumed by CP when polling memory
> shouldn't be substantial.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx>
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  4 +--
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 28 ++++++++---------
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h    |  5 +--
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 28 ++++++++---------
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  5 +--
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c    | 31 ++++++++++++++-----
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |  5 +--
>  10 files changed, 65 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 8dfdb18197c4..53f5f1885870 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
>       .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
>       .clear_address_watch = kgd_gfx_v9_clear_address_watch,
>       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> -     .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
>       .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
>       .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
>       .hqd_reset = kgd_gfx_v9_hqd_reset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 9abf29b58ac7..6fd41aece7e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
>       .set_address_watch = kgd_gfx_v9_set_address_watch,
>       .clear_address_watch = kgd_gfx_v9_clear_address_watch,
>       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> -     .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
>       .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>       .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
>       .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index e2ae714a700f..95f249896275 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -530,8 +530,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
>       .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>       .program_trap_handler_settings =
>                               kgd_gfx_v9_program_trap_handler_settings,
> -     .build_grace_period_packet_info =
> -                             kgd_gfx_v9_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> +                             kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
>       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
>       .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
>       .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 62176d607bef..0b03f2e9a858 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct
> amdgpu_device *adev,
>       *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0,
> mmCP_IQ_WAIT_TIME2));
>  }
>
> -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
>                                               uint32_t wait_times,
> -                                             uint32_t grace_period,
> +                                             uint32_t sch_wave,
> +                                             uint32_t que_sleep,
>                                               uint32_t *reg_offset,
>                                               uint32_t *reg_data)
>  {
>       *reg_data = wait_times;
>
> -     /*
> -      * The CP cannont handle a 0 grace period input and will result in
> -      * an infinite grace period being set so set to 1 to prevent this.
> -      */
> -     if (grace_period == 0)
> -             grace_period = 1;
> -
> -     *reg_data = REG_SET_FIELD(*reg_data,
> -                     CP_IQ_WAIT_TIME2,
> -                     SCH_WAVE,
> -                     grace_period);
> +     if (sch_wave)
> +             *reg_data = REG_SET_FIELD(*reg_data,
> +                             CP_IQ_WAIT_TIME2,
> +                             SCH_WAVE,
> +                             sch_wave);
> +     if (que_sleep)
> +             *reg_data = REG_SET_FIELD(*reg_data,
> +                             CP_IQ_WAIT_TIME2,
> +                             QUE_SLEEP,
> +                             que_sleep);
>
>       *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
>  }
> @@ -1109,7 +1109,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>       .set_address_watch = kgd_gfx_v10_set_address_watch,
>       .clear_address_watch = kgd_gfx_v10_clear_address_watch,
>       .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> -     .build_grace_period_packet_info =
> kgd_gfx_v10_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
>       .program_trap_handler_settings = program_trap_handler_settings,
>       .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
>       .hqd_reset = kgd_gfx_v10_hqd_reset
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 9efd2dd4fdd7..89ae07288b10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct
> amdgpu_device *adev,
>  void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
>                               uint32_t *wait_times,
>                               uint32_t inst);
> -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
>                                              uint32_t wait_times,
> -                                            uint32_t grace_period,
> +                                            uint32_t sch_wave,
> +                                            uint32_t que_sleep,
>                                              uint32_t *reg_offset,
>                                              uint32_t *reg_data);
>  uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index c718bedda0ca..2c5f22838fe0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
>       .set_vm_context_page_table_base =
> set_vm_context_page_table_base_v10_3,
>       .program_trap_handler_settings = program_trap_handler_settings_v10_3,
>       .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> -     .build_grace_period_packet_info =
> kgd_gfx_v10_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
>       .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
>       .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
>       .validate_trap_override_request =
> kgd_gfx_v10_validate_trap_override_request,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 441568163e20..d2bbe9973c93 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct
> amdgpu_device *adev,
>                               adev->gfx.cu_info.max_waves_per_simd;
>  }
>
> -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
>               uint32_t wait_times,
> -             uint32_t grace_period,
> +             uint32_t sch_wave,
> +             uint32_t que_sleep,
>               uint32_t *reg_offset,
>               uint32_t *reg_data)
>  {
>       *reg_data = wait_times;
>
> -     /*
> -      * The CP cannot handle a 0 grace period input and will result in
> -      * an infinite grace period being set so set to 1 to prevent this.
> -      */
> -     if (grace_period == 0)
> -             grace_period = 1;
> -
> -     *reg_data = REG_SET_FIELD(*reg_data,
> -                     CP_IQ_WAIT_TIME2,
> -                     SCH_WAVE,
> -                     grace_period);
> +     if (sch_wave)
> +             *reg_data = REG_SET_FIELD(*reg_data,
> +                             CP_IQ_WAIT_TIME2,
> +                             SCH_WAVE,
> +                             sch_wave);
> +     if (que_sleep)
> +             *reg_data = REG_SET_FIELD(*reg_data,
> +                             CP_IQ_WAIT_TIME2,
> +                             QUE_SLEEP,
> +                             que_sleep);
>
>       *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
>  }
> @@ -1254,7 +1254,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>       .set_address_watch = kgd_gfx_v9_set_address_watch,
>       .clear_address_watch = kgd_gfx_v9_clear_address_watch,
>       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> -     .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
> +     .build_dequeue_wait_counts_packet_info =
> kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
>       .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>       .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
>       .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index b6a91a552aa4..54ee8992be5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct
> amdgpu_device *adev,
>  void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
>                               uint32_t *wait_times,
>                               uint32_t inst);
> -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device
> *adev,
>                                              uint32_t wait_times,
> -                                            uint32_t grace_period,
> +                                            uint32_t sch_wave,
> +                                            uint32_t que_sleep,
>                                              uint32_t *reg_offset,
>                                              uint32_t *reg_data);
>  uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 9cb21af1d0af..f9c302732773 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager
> *pm, uint32_t *buffer,
>  }
>
>  static inline void pm_build_dequeue_wait_counts_packet_info(struct
> packet_manager *pm,
> -                     uint32_t sch_value, uint32_t *reg_offset,
> +                     uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
>                       uint32_t *reg_data)
>  {
> -     pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> +     pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
>               pm->dqm->dev->adev,
>               pm->dqm->wait_times,
>               sch_value,
> +             que_sleep,
>               reg_offset,
>               reg_data);
>  }
> @@ -320,26 +321,40 @@ static int pm_config_dequeue_wait_counts_v9(struct
> packet_manager *pm,
>
>       switch (cmd) {
>       case KFD_DEQUEUE_WAIT_INIT:
> +             uint32_t sch_wave = 0, que_sleep = 0;
> +             /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default
> 0x40.
> +              * On a 1GHz machine this is roughly 1 microsecond, which is
> +              * about how long it takes to load data out of memory during
> +              * queue connect
> +              * QUE_SLEEP: Wait Count for Dequeue Retry.
> +              */
> +             if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
> +                 KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0))
> +                     que_sleep = 1;
> +
>               /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
>               if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev-
> >gmc.is_app_apu &&
>                  (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
> -                     pm_build_dequeue_wait_counts_packet_info(pm, 1,
> &reg_offset, &reg_data);
> -             else
> -                     return 0;
> +                     sch_wave = 1;
> +

I'd still early return 0 on !(que_sleep || sch_wave) here as sending a default packet wait times packet on WAIT_INIT to the HWS for GC_VERSION < 9.4.1 or GFX10 seems unnecessary.

> +             pm_build_dequeue_wait_counts_packet_info(pm, sch_wave,
> que_sleep,
> +                     &reg_offset, &reg_data);

You can probably just put this in one line if you're not exceeding the 100 char limit.

> +
>               break;
>       case KFD_DEQUEUE_WAIT_RESET:
>               /* function called only to get reg_offset */
> -             pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset,
> &reg_data);
> +             pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset,
> &reg_data);
>               reg_data = pm->dqm->wait_times;

Is the assignment of reg_data necessary here (not sure if it was necessary in patch 1 either)?  I could be wrong, but from the kgd calls, it looks like you'd initialize dqm->wait_times anyways for reg_data and reg_data remains unaltered with your changes that conditionally set the wait fields on non-null value (which is what's currently happening in your proposed WAIT_INIT for que_sleep == 0 & sch_wave == 0 condition).

With nit-picks addressed, this patch is:
Reviewed-by: Jonathan Kim <jonathan.kim@xxxxxxx>

>               break;
>
>       case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
>               /* The CP cannot handle value 0 and it will result in
> -              * an infinite grace period being set so set to 1 to prevent this.
> +              * an infinite grace period being set so set to 1 to prevent this. Also
> +              * avoid debugger API breakage as it sets 0 and expects a low value.
>                */
>               if (!value)
>                       value = 1;
> -             pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset,
> &reg_data);
> +             pm_build_dequeue_wait_counts_packet_info(pm, value, 0,
> &reg_offset, &reg_data);
>               break;
>       }
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index e3e635a31b8a..4f79e707bc80 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -313,9 +313,10 @@ struct kfd2kgd_calls {
>       void (*get_iq_wait_times)(struct amdgpu_device *adev,
>                       uint32_t *wait_times,
>                       uint32_t inst);
> -     void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
> +     void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device
> *adev,
>                       uint32_t wait_times,
> -                     uint32_t grace_period,
> +                     uint32_t sch_wave,
> +                     uint32_t que_sleep,
>                       uint32_t *reg_offset,
>                       uint32_t *reg_data);
>       void (*get_cu_occupancy)(struct amdgpu_device *adev,
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux