RE: [PATCH v2 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Harish
> Kasiviswanathan
> Sent: Monday, March 3, 2025 3:44 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx>
> Subject: [PATCH v2 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API
>
> Update pm_update_grace_period() to more cleaner
> pm_config_dequeue_wait_counts(). Previously, grace_period variable was
> overloaded as a variable and a macro, making it inflexible to configure
> additional dequeue wait times.
>
> pm_config_dequeue_wait_counts() now takes in a cmd / variable. This
> allows flexibility to update different dequeue wait times.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx>

Reviewed-by: Jonathan Kim <jonathan.kim@xxxxxxx>

> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 ++++++--------
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++-
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 26 +++++++-
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c    | 59 ++++++++++++++-----
>  .../drm/amd/amdkfd/kfd_packet_manager_vi.c    |  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         | 28 +++++++--
>  6 files changed, 123 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 94b1ac8a4735..cc7465f9562a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -42,6 +42,8 @@
>  /* Size of the per-pipe EOP queue */
>  #define CIK_HPD_EOP_BYTES_LOG2 11
>  #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
> +/* See unmap_queues_cpsch() */
> +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
>
>  static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
>                                 u32 pasid, unsigned int vmid);
> @@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager
> *dqm)
>
>       init_sdma_bitmaps(dqm);
>
> -     if (dqm->dev->kfd2kgd->get_iq_wait_times)
> -             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> -                                     &dqm->wait_times,
> -                                     ffs(dqm->dev->xcc_mask) - 1);
> +     update_dqm_wait_times(dqm);
>       return 0;
>  }
>
> @@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager
> *dqm)
>       /* clear hang status when driver try to start the hw scheduler */
>       dqm->sched_running = true;
>
> -     if (!dqm->dev->kfd->shared_resources.enable_mes)
> +     if (!dqm->dev->kfd->shared_resources.enable_mes) {
> +             if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
> +                     dev_err(dev, "Setting optimized dequeue wait failed. Using
> default values\n");
>               execute_queues_cpsch(dqm,
> KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
> USE_DEFAULT_GRACE_PERIOD);
> -
> -     /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> -     if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
> -         (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
> -             uint32_t reg_offset = 0;
> -             uint32_t grace_period = 1;
> -
> -             retval = pm_update_grace_period(&dqm->packet_mgr,
> -                                             grace_period);
> -             if (retval)
> -                     dev_err(dev, "Setting grace timeout failed\n");
> -             else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
> -                     /* Update dqm->wait_times maintained in software */
> -                     dqm->dev->kfd2kgd->build_grace_period_packet_info(
> -                                     dqm->dev->adev, dqm->wait_times,
> -                                     grace_period, &reg_offset,
> -                                     &dqm->wait_times);
>       }
>
>       /* setup per-queue reset detection buffer  */
> @@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct
> device_queue_manager *dqm)
>       return r;
>  }
>
> -/* dqm->lock mutex has to be locked before calling this function */
> +/* dqm->lock mutex has to be locked before calling this function
> + *
> + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
> + *   for context switch latency. Lower values are used by debugger
> + *   since context switching are triggered at high frequency.
> + *   This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
> + *
> + */
>  static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>                               enum kfd_unmap_queues_filter filter,
>                               uint32_t filter_param,
> @@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
>               return -EIO;
>
>       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
> -             retval = pm_update_grace_period(&dqm->packet_mgr,
> grace_period);
> +             retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_SET_SCH_WAVE,
> grace_period);
>               if (retval)
>                       goto out;
>       }
> @@ -2324,8 +2317,8 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
>
>       /* We need to reset the grace period value for this device */
>       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
> -             if (pm_update_grace_period(&dqm->packet_mgr,
> -                                     USE_DEFAULT_GRACE_PERIOD))
> +             if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> +                             KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
>                       dev_err(dev, "Failed to reset grace period\n");
>       }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 09ab36f8e8c6..917717cfe9c5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -37,7 +37,6 @@
>
>  #define KFD_MES_PROCESS_QUANTUM              100000
>  #define KFD_MES_GANG_QUANTUM         10000
> -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
>
>  struct device_process_node {
>       struct qcm_process_device *qpd;
> @@ -359,4 +358,14 @@ static inline int read_sdma_queue_counter(uint64_t __user
> *q_rptr, uint64_t *val
>       /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
>       return get_user(*val, q_rptr + 1);
>  }
> +
> +static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
> +{
> +     if (dqm->dev->kfd2kgd->get_iq_wait_times)
> +             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> +                                     &dqm->wait_times,
> +                                     ffs(dqm->dev->xcc_mask) - 1);
> +}
> +
> +
>  #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4984b41cd372..3f574d82b5fc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm,
> uint64_t fence_address,
>       return retval;
>  }
>
> -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
> +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
> + *  by writing to CP_IQ_WAIT_TIME2 registers.
> + *
> + *  @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
> + *  @value: Depends on the cmd. This parameter is unused for
> + *    KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
> + *    KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
> + *
> + */
> +int pm_config_dequeue_wait_counts(struct packet_manager *pm,
> +             enum kfd_config_dequeue_wait_counts_cmd cmd,
> +             uint32_t value)
>  {
>       struct kfd_node *node = pm->dqm->dev;
>       struct device *dev = node->adev->dev;
>       int retval = 0;
>       uint32_t *buffer, size;
>
> -     size = pm->pmf->set_grace_period_size;
> +     if (!pm->pmf->config_dequeue_wait_counts ||
> +         !pm->pmf->config_dequeue_wait_counts_size)
> +             return 0;
> +
> +     size = pm->pmf->config_dequeue_wait_counts_size;
>
>       mutex_lock(&pm->lock);
>
> @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager
> *pm, uint32_t grace_period)
>                       goto out;
>               }
>
> -             retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
> +             retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
> +                                                          cmd, value);
>               if (!retval)
>                       retval = kq_submit_packet(pm->priv_queue);
>               else
>                       kq_rollback_packet(pm->priv_queue);
>       }
>
> +     /* If default value is modified, cache that value in dqm->wait_times */
> +     if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
> +             update_dqm_wait_times(pm->dqm);
> +
>  out:
>       mutex_unlock(&pm->lock);
>       return retval;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index d56525201155..b9c611b249e6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -297,23 +297,54 @@ static int pm_map_queues_v9(struct packet_manager
> *pm, uint32_t *buffer,
>       return 0;
>  }
>
> -static int pm_set_grace_period_v9(struct packet_manager *pm,
> +static inline void pm_build_dequeue_wait_counts_packet_info(struct
> packet_manager *pm,
> +                     uint32_t sch_value, uint32_t *reg_offset,
> +                     uint32_t *reg_data)
> +{
> +     pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> +             pm->dqm->dev->adev,
> +             pm->dqm->wait_times,
> +             sch_value,
> +             reg_offset,
> +             reg_data);
> +}
> +
> +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
>               uint32_t *buffer,
> -             uint32_t grace_period)
> +             enum kfd_config_dequeue_wait_counts_cmd cmd,
> +             uint32_t value)
>  {
>       struct pm4_mec_write_data_mmio *packet;
>       uint32_t reg_offset = 0;
>       uint32_t reg_data = 0;
>
> -     pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> -                     pm->dqm->dev->adev,
> -                     pm->dqm->wait_times,
> -                     grace_period,
> -                     &reg_offset,
> -                     &reg_data);
> -
> -     if (grace_period == USE_DEFAULT_GRACE_PERIOD)
> +     switch (cmd) {
> +     case KFD_DEQUEUE_WAIT_INIT:
> +             /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> +             if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev-
> >gmc.is_app_apu &&
> +                (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
> +                     pm_build_dequeue_wait_counts_packet_info(pm, 1,
> &reg_offset, &reg_data);
> +             else
> +                     return 0;
> +             break;
> +     case KFD_DEQUEUE_WAIT_RESET:
> +             /* function called only to get reg_offset */
> +             pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset,
> &reg_data);
>               reg_data = pm->dqm->wait_times;
> +             break;
> +
> +     case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
> +             /* The CP cannot handle value 0 and it will result in
> +              * an infinite grace period being set so set to 1 to prevent this.
> +              */
> +             if (!value)
> +                     value = 1;
> +             pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset,
> &reg_data);
> +             break;
> +     default:
> +             pr_err("Invalid dequeue wait cmd\n");
> +             return -EINVAL;
> +     }
>
>       packet = (struct pm4_mec_write_data_mmio *)buffer;
>       memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
> @@ -415,7 +446,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>       .set_resources          = pm_set_resources_v9,
>       .map_queues             = pm_map_queues_v9,
>       .unmap_queues           = pm_unmap_queues_v9,
> -     .set_grace_period       = pm_set_grace_period_v9,
> +     .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
>       .query_status           = pm_query_status_v9,
>       .release_mem            = NULL,
>       .map_process_size       = sizeof(struct pm4_mes_map_process),
> @@ -423,7 +454,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
> +     .config_dequeue_wait_counts_size  = sizeof(struct
> pm4_mec_write_data_mmio),
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = 0,
>  };
> @@ -434,7 +465,7 @@ const struct packet_manager_funcs
> kfd_aldebaran_pm_funcs = {
>       .set_resources          = pm_set_resources_v9,
>       .map_queues             = pm_map_queues_v9,
>       .unmap_queues           = pm_unmap_queues_v9,
> -     .set_grace_period       = pm_set_grace_period_v9,
> +     .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
>       .query_status           = pm_query_status_v9,
>       .release_mem            = NULL,
>       .map_process_size       = sizeof(struct pm4_mes_map_process_aldebaran),
> @@ -442,7 +473,7 @@ const struct packet_manager_funcs
> kfd_aldebaran_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
> +     .config_dequeue_wait_counts_size  = sizeof(struct
> pm4_mec_write_data_mmio),
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = 0,
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> index 347c86e1c378..a1de5d7e173a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
>       .set_resources          = pm_set_resources_vi,
>       .map_queues             = pm_map_queues_vi,
>       .unmap_queues           = pm_unmap_queues_vi,
> -     .set_grace_period       = NULL,
> +     .config_dequeue_wait_counts     = NULL,
>       .query_status           = pm_query_status_vi,
>       .release_mem            = pm_release_mem_vi,
>       .map_process_size       = sizeof(struct pm4_mes_map_process),
> @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
>       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> -     .set_grace_period_size  = 0,
> +     .config_dequeue_wait_counts_size        = 0,
>       .query_status_size      = sizeof(struct pm4_mes_query_status),
>       .release_mem_size       = sizeof(struct pm4_mec_release_mem)
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 59619f794b6b..4c8026947a73 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1392,6 +1392,24 @@ int pqm_get_queue_checkpoint_info(struct
> process_queue_manager *pqm,
>  #define KFD_FENCE_COMPLETED (100)
>  #define KFD_FENCE_INIT   (10)
>
> +/**
> + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
> + *  dequeue wait counts.
> + *
> + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
> + *   certain ASICs. For these ASICs, this is default value used by RESET
> + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized
> value
> + *   for certain ASICs. For others set it to default hardware reset value
> + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
> + *
> + */
> +enum kfd_config_dequeue_wait_counts_cmd {
> +     KFD_DEQUEUE_WAIT_INIT = 1,
> +     KFD_DEQUEUE_WAIT_RESET = 2,
> +     KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
> +};
> +
> +
>  struct packet_manager {
>       struct device_queue_manager *dqm;
>       struct kernel_queue *priv_queue;
> @@ -1417,8 +1435,8 @@ struct packet_manager_funcs {
>       int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
>                       enum kfd_unmap_queues_filter mode,
>                       uint32_t filter_param, bool reset);
> -     int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
> -                     uint32_t grace_period);
> +     int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t
> *buffer,
> +                     enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t
> value);
>       int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
>                       uint64_t fence_address, uint64_t fence_value);
>       int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
> @@ -1429,7 +1447,7 @@ struct packet_manager_funcs {
>       int set_resources_size;
>       int map_queues_size;
>       int unmap_queues_size;
> -     int set_grace_period_size;
> +     int config_dequeue_wait_counts_size;
>       int query_status_size;
>       int release_mem_size;
>  };
> @@ -1452,7 +1470,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
>
>  void pm_release_ib(struct packet_manager *pm);
>
> -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
> +int pm_config_dequeue_wait_counts(struct packet_manager *pm,
> +                     enum kfd_config_dequeue_wait_counts_cmd cmd,
> +                     uint32_t wait_counts_config);
>
>  /* Following PM funcs can be shared among VI and AI */
>  unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux