RE: [PATCH 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

Overall lgtm.
A comment and nitpick below.

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Harish
> Kasiviswanathan
> Sent: Wednesday, February 26, 2025 2:23 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx>
> Subject: [PATCH 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API
>
> Update pm_update_grace_period() to more cleaner
> pm_config_dequeue_wait_counts(). Previously, grace_period variable was
> overloaded as a variable and a macro, making it inflexible to configure
> additional dequeue wait times.
>
> pm_config_dequeue_wait_counts() now takes in a cmd / variable. This
> allows flexibility to update different dequeue wait times.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx>
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +++++++--------
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++-
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 26 ++++++++-
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c    | 56 ++++++++++++++-----
>  .../drm/amd/amdkfd/kfd_packet_manager_vi.c    |  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         | 28 ++++++++--
>  6 files changed, 120 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 94b1ac8a4735..cc7465f9562a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -42,6 +42,8 @@
>  /* Size of the per-pipe EOP queue */
>  #define CIK_HPD_EOP_BYTES_LOG2 11
>  #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
> +/* See unmap_queues_cpsch() */
> +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
>
>  static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
>                                 u32 pasid, unsigned int vmid);
> @@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager
> *dqm)
>
>       init_sdma_bitmaps(dqm);
>
> -     if (dqm->dev->kfd2kgd->get_iq_wait_times)
> -             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> -                                     &dqm->wait_times,
> -                                     ffs(dqm->dev->xcc_mask) - 1);
> +     update_dqm_wait_times(dqm);
>       return 0;
>  }
>
> @@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager
> *dqm)
>       /* clear hang status when driver try to start the hw scheduler */
>       dqm->sched_running = true;
>
> -     if (!dqm->dev->kfd->shared_resources.enable_mes)
> +     if (!dqm->dev->kfd->shared_resources.enable_mes) {
> +             if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
> +                     dev_err(dev, "Setting optimized dequeue wait failed. Using
> default values\n");
>               execute_queues_cpsch(dqm,
> KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
> USE_DEFAULT_GRACE_PERIOD);
> -
> -     /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> -     if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
> -         (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
> -             uint32_t reg_offset = 0;
> -             uint32_t grace_period = 1;
> -
> -             retval = pm_update_grace_period(&dqm->packet_mgr,
> -                                             grace_period);
> -             if (retval)
> -                     dev_err(dev, "Setting grace timeout failed\n");
> -             else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
> -                     /* Update dqm->wait_times maintained in software */
> -                     dqm->dev->kfd2kgd->build_grace_period_packet_info(
> -                                     dqm->dev->adev, dqm->wait_times,
> -                                     grace_period, &reg_offset,
> -                                     &dqm->wait_times);
>       }
>
>       /* setup per-queue reset detection buffer  */
> @@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct
> device_queue_manager *dqm)
>       return r;
>  }
>
> -/* dqm->lock mutex has to be locked before calling this function */
> +/* dqm->lock mutex has to be locked before calling this function
> + *
> + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
> + *   for context switch latency. Lower values are used by debugger
> + *   since context switching are triggered at high frequency.
> + *   This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
> + *
> + */
>  static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>                               enum kfd_unmap_queues_filter filter,
>                               uint32_t filter_param,
> @@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
>               return -EIO;
>
>       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
> -             retval = pm_update_grace_period(&dqm->packet_mgr,
> grace_period);
> +             retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_SET_SCH_WAVE,
> grace_period);
>               if (retval)
>                       goto out;
>       }
> @@ -2324,8 +2317,8 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
>
>       /* We need to reset the grace period value for this device */
>       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
> -             if (pm_update_grace_period(&dqm->packet_mgr,
> -                                     USE_DEFAULT_GRACE_PERIOD))
> +             if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
>                       dev_err(dev, "Failed to reset grace period\n");
>       }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 09ab36f8e8c6..917717cfe9c5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -37,7 +37,6 @@
>
>  #define KFD_MES_PROCESS_QUANTUM              100000
>  #define KFD_MES_GANG_QUANTUM         10000
> -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
>
>  struct device_process_node {
>       struct qcm_process_device *qpd;
> @@ -359,4 +358,14 @@ static inline int read_sdma_queue_counter(uint64_t __user
> *q_rptr, uint64_t *val
>       /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
>       return get_user(*val, q_rptr + 1);
>  }
> +
> +static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
> +{
> +     if (dqm->dev->kfd2kgd->get_iq_wait_times)
> +             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> +                                     &dqm->wait_times,
> +                                     ffs(dqm->dev->xcc_mask) - 1);
> +}
> +
> +
>  #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4984b41cd372..dd19ae40f0ba 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm,
> uint64_t fence_address,
>       return retval;
>  }
>
> -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
> +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
> + *  by writing to CP_IQ_WAIT_TIME2 registers.
> + *
> + *  @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
> + *  @value: Depends on the cmd. This parameter is unused for
> + *    KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
> + *    KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
> + *
> + */
> +int pm_config_dequeue_wait_counts(struct packet_manager *pm,
> +             enum kfd_config_dequeue_wait_counts_cmd cmd,
> +             uint32_t value)
>  {
>       struct kfd_node *node = pm->dqm->dev;
>       struct device *dev = node->adev->dev;
>       int retval = 0;
>       uint32_t *buffer, size;
>
> -     size = pm->pmf->set_grace_period_size;
> +     if (!pm->pmf->config_dequeue_wait_counts ||
> +         !pm->pmf->config_dequeue_wait_counts_size)
> +             return 0;
> +
> +     size = pm->pmf->config_dequeue_wait_counts_size;
>
>       mutex_lock(&pm->lock);
>
> @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager
> *pm, uint32_t grace_period)
>                       goto out;
>               }
>
> -             retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
> +             retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
> +                                                          cmd, value);
>               if (!retval)
>                       retval = kq_submit_packet(pm->priv_queue);
>               else
>                       kq_rollback_packet(pm->priv_queue);
>       }
>
> +     /* If default value is modified, cache that value in dqm->wait_times */
> +     if (cmd == KFD_DEQUEUE_WAIT_INIT)
> +             update_dqm_wait_times(pm->dqm);

Conditionally store the optimized default value for WAIT_INIT only if config_dequeue_wait_counts succeeds.

Also does this permanently leave optimized settings in HW different from initial golden settings?
Not sure if this kind of footprint matters in the end.
I'd assume that anything like a gpu reset, driver reload or dynamic partition change would reset those registers back to non-optimized settings anyways.
Can't really think of a situation where this would be a problem at the moment ...

> +
>  out:
>       mutex_unlock(&pm->lock);
>       return retval;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index d56525201155..9cb21af1d0af 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -297,23 +297,51 @@ static int pm_map_queues_v9(struct packet_manager
> *pm, uint32_t *buffer,
>       return 0;
>  }
>
> -static int pm_set_grace_period_v9(struct packet_manager *pm,
> +static inline void pm_build_dequeue_wait_counts_packet_info(struct
> packet_manager *pm,
> +                     uint32_t sch_value, uint32_t *reg_offset,
> +                     uint32_t *reg_data)
> +{
> +     pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> +             pm->dqm->dev->adev,
> +             pm->dqm->wait_times,
> +             sch_value,
> +             reg_offset,
> +             reg_data);
> +}
> +
> +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
>               uint32_t *buffer,
> -             uint32_t grace_period)
> +             enum kfd_config_dequeue_wait_counts_cmd cmd,
> +             uint32_t value)
>  {
>       struct pm4_mec_write_data_mmio *packet;
>       uint32_t reg_offset = 0;
>       uint32_t reg_data = 0;
>
> -     pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> -                     pm->dqm->dev->adev,
> -                     pm->dqm->wait_times,
> -                     grace_period,
> -                     &reg_offset,
> -                     &reg_data);
> -
> -     if (grace_period == USE_DEFAULT_GRACE_PERIOD)
> +     switch (cmd) {
> +     case KFD_DEQUEUE_WAIT_INIT:
> +             /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> +             if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev-
> >gmc.is_app_apu &&
> +                (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
> +                     pm_build_dequeue_wait_counts_packet_info(pm, 1,
> &reg_offset, &reg_data);
> +             else
> +                     return 0;
> +             break;
> +     case KFD_DEQUEUE_WAIT_RESET:
> +             /* function called only to get reg_offset */
> +             pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset,
> &reg_data);
>               reg_data = pm->dqm->wait_times;
> +             break;
> +
> +     case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
> +             /* The CP cannot handle value 0 and it will result in
> +              * an infinite grace period being set so set to 1 to prevent this.
> +              */
> +             if (!value)
> +                     value = 1;
> +             pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset,
> &reg_data);
> +             break;

Return -EINVAL on default case for safety.

Jon

> +     }
>
>       packet = (struct pm4_mec_write_data_mmio *)buffer;
>       memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
> @@ -415,7 +443,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>       .set_resources          = pm_set_resources_v9,
>       .map_queues             = pm_map_queues_v9,
>       .unmap_queues           = pm_unmap_queues_v9,
> -     .set_grace_period       = pm_set_grace_period_v9,
> +     .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
>       .query_status           = pm_query_status_v9,
>       .release_mem            = NULL,
>       .map_process_size       = sizeof(struct pm4_mes_map_process),
> @@ -423,7 +451,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
> +     .config_dequeue_wait_counts_size  = sizeof(struct
> pm4_mec_write_data_mmio),
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = 0,
>  };
> @@ -434,7 +462,7 @@ const struct packet_manager_funcs
> kfd_aldebaran_pm_funcs = {
>       .set_resources          = pm_set_resources_v9,
>       .map_queues             = pm_map_queues_v9,
>       .unmap_queues           = pm_unmap_queues_v9,
> -     .set_grace_period       = pm_set_grace_period_v9,
> +     .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
>       .query_status           = pm_query_status_v9,
>       .release_mem            = NULL,
>       .map_process_size       = sizeof(struct pm4_mes_map_process_aldebaran),
> @@ -442,7 +470,7 @@ const struct packet_manager_funcs
> kfd_aldebaran_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
> +     .config_dequeue_wait_counts_size  = sizeof(struct
> pm4_mec_write_data_mmio),
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = 0,
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> index 347c86e1c378..a1de5d7e173a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
>       .set_resources          = pm_set_resources_vi,
>       .map_queues             = pm_map_queues_vi,
>       .unmap_queues           = pm_unmap_queues_vi,
> -     .set_grace_period       = NULL,
> +     .config_dequeue_wait_counts     = NULL,
>       .query_status           = pm_query_status_vi,
>       .release_mem            = pm_release_mem_vi,
>       .map_process_size       = sizeof(struct pm4_mes_map_process),
> @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = 0,
> +     .config_dequeue_wait_counts_size        = 0,
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = sizeof(struct pm4_mec_release_mem)
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 59619f794b6b..4c8026947a73 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1392,6 +1392,24 @@ int pqm_get_queue_checkpoint_info(struct
> process_queue_manager *pqm,
>  #define KFD_FENCE_COMPLETED (100)
>  #define KFD_FENCE_INIT   (10)
>
> +/**
> + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
> + *  dequeue wait counts.
> + *
> + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
> + *   certain ASICs. For these ASICs, this is default value used by RESET
> + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized
> value
> + *   for certain ASICs. For others set it to default hardware reset value
> + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
> + *
> + */
> +enum kfd_config_dequeue_wait_counts_cmd {
> +     KFD_DEQUEUE_WAIT_INIT = 1,
> +     KFD_DEQUEUE_WAIT_RESET = 2,
> +     KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
> +};
> +
> +
>  struct packet_manager {
>       struct device_queue_manager *dqm;
>       struct kernel_queue *priv_queue;
> @@ -1417,8 +1435,8 @@ struct packet_manager_funcs {
>       int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
>                       enum kfd_unmap_queues_filter mode,
>                       uint32_t filter_param, bool reset);
> -     int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
> -                     uint32_t grace_period);
> +     int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t
> *buffer,
> +                     enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t
> value);
>       int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
>                       uint64_t fence_address, uint64_t fence_value);
>       int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
> @@ -1429,7 +1447,7 @@ struct packet_manager_funcs {
>       int set_resources_size;
>       int map_queues_size;
>       int unmap_queues_size;
> -     int set_grace_period_size;
> +     int config_dequeue_wait_counts_size;
>       int query_status_size;
>       int release_mem_size;
>  };
> @@ -1452,7 +1470,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
>
>  void pm_release_ib(struct packet_manager *pm);
>
> -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
> +int pm_config_dequeue_wait_counts(struct packet_manager *pm,
> +                     enum kfd_config_dequeue_wait_counts_cmd cmd,
> +                     uint32_t wait_counts_config);
>
>  /* Following PM funcs can be shared among VI and AI */
>  unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux