[Public] > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Harish > Kasiviswanathan > Sent: Monday, March 3, 2025 3:44 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx> > Subject: [PATCH v2 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API > > Update pm_update_grace_period() to more cleaner > pm_config_dequeue_wait_counts(). Previously, grace_period variable was > overloaded as a variable and a macro, making it inflexible to configure > additional dequeue wait times. > > pm_config_dequeue_wait_counts() now takes in a cmd / variable. This > allows flexibility to update different dequeue wait times. > > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx> Reviewed-by: Jonathan Kim <jonathan.kim@xxxxxxx> > --- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 ++++++-------- > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++- > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 +++++++- > .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 59 ++++++++++++++----- > .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 +- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 +++++++-- > 6 files changed, 123 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 94b1ac8a4735..cc7465f9562a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -42,6 +42,8 @@ > /* Size of the per-pipe EOP queue */ > #define CIK_HPD_EOP_BYTES_LOG2 11 > #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) > +/* See unmap_queues_cpsch() */ > +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff > > static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, > u32 pasid, unsigned int vmid); > @@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager > *dqm) > > init_sdma_bitmaps(dqm); > > - if (dqm->dev->kfd2kgd->get_iq_wait_times) > - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, > - &dqm->wait_times, > - ffs(dqm->dev->xcc_mask) - 1); > + update_dqm_wait_times(dqm); > return 0; > } > > @@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager > *dqm) > /* clear hang status when driver try to start the hw scheduler */ > dqm->sched_running = true; > > - if (!dqm->dev->kfd->shared_resources.enable_mes) > + if (!dqm->dev->kfd->shared_resources.enable_mes) { > + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, > + KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) > + dev_err(dev, "Setting optimized dequeue wait failed. Using > default values\n"); > execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD); > - > - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ > - if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && > - (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { > - uint32_t reg_offset = 0; > - uint32_t grace_period = 1; > - > - retval = pm_update_grace_period(&dqm->packet_mgr, > - grace_period); > - if (retval) > - dev_err(dev, "Setting grace timeout failed\n"); > - else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) > - /* Update dqm->wait_times maintained in software */ > - dqm->dev->kfd2kgd->build_grace_period_packet_info( > - dqm->dev->adev, dqm->wait_times, > - grace_period, ®_offset, > - &dqm->wait_times); > } > > /* setup per-queue reset detection buffer */ > @@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct > device_queue_manager *dqm) > return r; > } > > -/* dqm->lock mutex has to be locked before calling this function */ > +/* dqm->lock mutex has to be locked before calling this function > + * > + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time > + * for context switch latency. Lower values are used by debugger > + * since context switching are triggered at high frequency. > + * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE > + * > + */ > static int unmap_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > uint32_t filter_param, > @@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct > device_queue_manager *dqm, > return -EIO; > > if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > - retval = pm_update_grace_period(&dqm->packet_mgr, > grace_period); > + retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, > + KFD_DEQUEUE_WAIT_SET_SCH_WAVE, > grace_period); > if (retval) > goto out; > } > @@ -2324,8 +2317,8 @@ static int unmap_queues_cpsch(struct > device_queue_manager *dqm, > > /* We need to reset the grace period value for this device */ > if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > - if (pm_update_grace_period(&dqm->packet_mgr, > - USE_DEFAULT_GRACE_PERIOD)) > + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, > + KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) > dev_err(dev, "Failed to reset grace period\n"); > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index 09ab36f8e8c6..917717cfe9c5 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -37,7 +37,6 @@ > > #define KFD_MES_PROCESS_QUANTUM 100000 > #define KFD_MES_GANG_QUANTUM 10000 > -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff > > struct device_process_node { > struct qcm_process_device *qpd; > @@ -359,4 +358,14 @@ static inline int read_sdma_queue_counter(uint64_t __user > *q_rptr, uint64_t *val > /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */ > return get_user(*val, q_rptr + 1); > } > + > +static inline void update_dqm_wait_times(struct device_queue_manager *dqm) > +{ > + if (dqm->dev->kfd2kgd->get_iq_wait_times) > + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, > + &dqm->wait_times, > + ffs(dqm->dev->xcc_mask) - 1); > +} > + > + > #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > index 4984b41cd372..3f574d82b5fc 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > @@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm, > uint64_t fence_address, > return retval; > } > > -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) > +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts > + * by writing to CP_IQ_WAIT_TIME2 registers. > + * > + * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition > + * @value: Depends on the cmd. This parameter is unused for > + * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For > + * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set > + * > + */ > +int pm_config_dequeue_wait_counts(struct packet_manager *pm, > + enum kfd_config_dequeue_wait_counts_cmd cmd, > + uint32_t value) > { > struct kfd_node *node = pm->dqm->dev; > struct device *dev = node->adev->dev; > int retval = 0; > uint32_t *buffer, size; > > - size = pm->pmf->set_grace_period_size; > + if (!pm->pmf->config_dequeue_wait_counts || > + !pm->pmf->config_dequeue_wait_counts_size) > + return 0; > + > + size = pm->pmf->config_dequeue_wait_counts_size; > > mutex_lock(&pm->lock); > > @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager > *pm, uint32_t grace_period) > goto out; > } > > - retval = pm->pmf->set_grace_period(pm, buffer, grace_period); > + retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, > + cmd, value); > if (!retval) > retval = kq_submit_packet(pm->priv_queue); > else > kq_rollback_packet(pm->priv_queue); > } > > + /* If default value is modified, cache that value in dqm->wait_times */ > + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) > + update_dqm_wait_times(pm->dqm); > + > out: > mutex_unlock(&pm->lock); > return retval; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > index d56525201155..b9c611b249e6 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > @@ -297,23 +297,54 @@ static int pm_map_queues_v9(struct packet_manager > *pm, uint32_t *buffer, > return 0; > } > > -static int pm_set_grace_period_v9(struct packet_manager *pm, > +static inline void pm_build_dequeue_wait_counts_packet_info(struct > packet_manager *pm, > + uint32_t sch_value, uint32_t *reg_offset, > + uint32_t *reg_data) > +{ > + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( > + pm->dqm->dev->adev, > + pm->dqm->wait_times, > + sch_value, > + reg_offset, > + reg_data); > +} > + > +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, > uint32_t *buffer, > - uint32_t grace_period) > + enum kfd_config_dequeue_wait_counts_cmd cmd, > + uint32_t value) > { > struct pm4_mec_write_data_mmio *packet; > uint32_t reg_offset = 0; > uint32_t reg_data = 0; > > - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( > - pm->dqm->dev->adev, > - pm->dqm->wait_times, > - grace_period, > - ®_offset, > - ®_data); > - > - if (grace_period == USE_DEFAULT_GRACE_PERIOD) > + switch (cmd) { > + case KFD_DEQUEUE_WAIT_INIT: > + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ > + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev- > >gmc.is_app_apu && > + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) > + pm_build_dequeue_wait_counts_packet_info(pm, 1, > ®_offset, ®_data); > + else > + return 0; > + break; > + case KFD_DEQUEUE_WAIT_RESET: > + /* function called only to get reg_offset */ > + pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, > ®_data); > reg_data = pm->dqm->wait_times; > + break; > + > + case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: > + /* The CP cannot handle value 0 and it will result in > + * an infinite grace period being set so set to 1 to prevent this. > + */ > + if (!value) > + value = 1; > + pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, > ®_data); > + break; > + default: > + pr_err("Invalid dequeue wait cmd\n"); > + return -EINVAL; > + } > > packet = (struct pm4_mec_write_data_mmio *)buffer; > memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); > @@ -415,7 +446,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { > .set_resources = pm_set_resources_v9, > .map_queues = pm_map_queues_v9, > .unmap_queues = pm_unmap_queues_v9, > - .set_grace_period = pm_set_grace_period_v9, > + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, > .query_status = pm_query_status_v9, > .release_mem = NULL, > .map_process_size = sizeof(struct pm4_mes_map_process), > @@ -423,7 +454,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { > .set_resources_size = sizeof(struct pm4_mes_set_resources), > .map_queues_size = sizeof(struct pm4_mes_map_queues), > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > + .config_dequeue_wait_counts_size = sizeof(struct > pm4_mec_write_data_mmio), > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = 0, > }; > @@ -434,7 +465,7 @@ const struct packet_manager_funcs > kfd_aldebaran_pm_funcs = { > .set_resources = pm_set_resources_v9, > .map_queues = pm_map_queues_v9, > .unmap_queues = pm_unmap_queues_v9, > - .set_grace_period = pm_set_grace_period_v9, > + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, > .query_status = pm_query_status_v9, > .release_mem = NULL, > .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), > @@ -442,7 +473,7 @@ const struct packet_manager_funcs > kfd_aldebaran_pm_funcs = { > .set_resources_size = sizeof(struct pm4_mes_set_resources), > .map_queues_size = sizeof(struct pm4_mes_map_queues), > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > + .config_dequeue_wait_counts_size = sizeof(struct > pm4_mec_write_data_mmio), > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = 0, > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c > index 347c86e1c378..a1de5d7e173a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c > @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { > .set_resources = pm_set_resources_vi, > .map_queues = pm_map_queues_vi, > .unmap_queues = pm_unmap_queues_vi, > - .set_grace_period = NULL, > + .config_dequeue_wait_counts = NULL, > .query_status = pm_query_status_vi, > .release_mem = pm_release_mem_vi, > .map_process_size = sizeof(struct pm4_mes_map_process), > @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { > .set_resources_size = sizeof(struct pm4_mes_set_resources), > .map_queues_size = sizeof(struct pm4_mes_map_queues), > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > - .set_grace_period_size = 0, > + .config_dequeue_wait_counts_size = 0, > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = sizeof(struct pm4_mec_release_mem) > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 59619f794b6b..4c8026947a73 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -1392,6 +1392,24 @@ int pqm_get_queue_checkpoint_info(struct > process_queue_manager *pqm, > #define KFD_FENCE_COMPLETED (100) > #define KFD_FENCE_INIT (10) > > +/** > + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring > + * dequeue wait counts. > + * > + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a > + * certain ASICs. For these ASICs, this is default value used by RESET > + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized > value > + * for certain ASICs. For others set it to default hardware reset value > + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait > + * > + */ > +enum kfd_config_dequeue_wait_counts_cmd { > + KFD_DEQUEUE_WAIT_INIT = 1, > + KFD_DEQUEUE_WAIT_RESET = 2, > + KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3 > +}; > + > + > struct packet_manager { > struct device_queue_manager *dqm; > struct kernel_queue *priv_queue; > @@ -1417,8 +1435,8 @@ struct packet_manager_funcs { > int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, > enum kfd_unmap_queues_filter mode, > uint32_t filter_param, bool reset); > - int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, > - uint32_t grace_period); > + int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t > *buffer, > + enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t > value); > int (*query_status)(struct packet_manager *pm, uint32_t *buffer, > uint64_t fence_address, uint64_t fence_value); > int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); > @@ -1429,7 +1447,7 @@ struct packet_manager_funcs { > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > - int set_grace_period_size; > + int config_dequeue_wait_counts_size; > int query_status_size; > int release_mem_size; > }; > @@ -1452,7 +1470,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, > > void pm_release_ib(struct packet_manager *pm); > > -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); > +int pm_config_dequeue_wait_counts(struct packet_manager *pm, > + enum kfd_config_dequeue_wait_counts_cmd cmd, > + uint32_t wait_counts_config); > > /* Following PM funcs can be shared among VI and AI */ > unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); > -- > 2.34.1