Update pm_update_grace_period() to more cleaner pm_config_dequeue_wait_counts(). Previously, grace_period variable was overloaded as a variable and a macro, making it inflexible to configure additional dequeue wait times. pm_config_dequeue_wait_counts() now takes in a cmd / variable. This allows flexibility to update different dequeue wait times. Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +++++++-------- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 ++++++++- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 56 ++++++++++++++----- .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++++++++-- 6 files changed, 120 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 94b1ac8a4735..cc7465f9562a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -42,6 +42,8 @@ /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) +/* See unmap_queues_cpsch() */ +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid); @@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) init_sdma_bitmaps(dqm); - if (dqm->dev->kfd2kgd->get_iq_wait_times) - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, - &dqm->wait_times, - ffs(dqm->dev->xcc_mask) - 1); + update_dqm_wait_times(dqm); return 0; } @@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager *dqm) /* clear hang status when driver try to start the hw scheduler */ dqm->sched_running = true; - if (!dqm->dev->kfd->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) { + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) + dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); - - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { - uint32_t reg_offset = 0; - uint32_t grace_period = 1; - - retval = pm_update_grace_period(&dqm->packet_mgr, - grace_period); - if (retval) - dev_err(dev, "Setting grace timeout failed\n"); - else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) - /* Update dqm->wait_times maintained in software */ - dqm->dev->kfd2kgd->build_grace_period_packet_info( - dqm->dev->adev, dqm->wait_times, - grace_period, ®_offset, - &dqm->wait_times); } /* setup per-queue reset detection buffer */ @@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) return r; } -/* dqm->lock mutex has to be locked before calling this function */ +/* dqm->lock mutex has to be locked before calling this function + * + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time + * for context switch latency. Lower values are used by debugger + * since context switching are triggered at high frequency. + * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE + * + */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param, @@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return -EIO; if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); + retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); if (retval) goto out; } @@ -2324,8 +2317,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* We need to reset the grace period value for this device */ if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - if (pm_update_grace_period(&dqm->packet_mgr, - USE_DEFAULT_GRACE_PERIOD)) + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) dev_err(dev, "Failed to reset grace period\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 09ab36f8e8c6..917717cfe9c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -37,7 +37,6 @@ #define KFD_MES_PROCESS_QUANTUM 100000 #define KFD_MES_GANG_QUANTUM 10000 -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff struct device_process_node { struct qcm_process_device *qpd; @@ -359,4 +358,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */ return get_user(*val, q_rptr + 1); } + +static inline void update_dqm_wait_times(struct device_queue_manager *dqm) +{ + if (dqm->dev->kfd2kgd->get_iq_wait_times) + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, + &dqm->wait_times, + ffs(dqm->dev->xcc_mask) - 1); +} + + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 4984b41cd372..dd19ae40f0ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, return retval; } -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts + * by writing to CP_IQ_WAIT_TIME2 registers. + * + * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition + * @value: Depends on the cmd. This parameter is unused for + * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For + * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set + * + */ +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; int retval = 0; uint32_t *buffer, size; - size = pm->pmf->set_grace_period_size; + if (!pm->pmf->config_dequeue_wait_counts || + !pm->pmf->config_dequeue_wait_counts_size) + return 0; + + size = pm->pmf->config_dequeue_wait_counts_size; mutex_lock(&pm->lock); @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) goto out; } - retval = pm->pmf->set_grace_period(pm, buffer, grace_period); + retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, + cmd, value); if (!retval) retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); } + /* If default value is modified, cache that value in dqm->wait_times */ + if (cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + out: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index d56525201155..9cb21af1d0af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -297,23 +297,51 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_set_grace_period_v9(struct packet_manager *pm, +static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, + uint32_t sch_value, uint32_t *reg_offset, + uint32_t *reg_data) +{ + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( + pm->dqm->dev->adev, + pm->dqm->wait_times, + sch_value, + reg_offset, + reg_data); +} + +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period) + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct pm4_mec_write_data_mmio *packet; uint32_t reg_offset = 0; uint32_t reg_data = 0; - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( - pm->dqm->dev->adev, - pm->dqm->wait_times, - grace_period, - ®_offset, - ®_data); - - if (grace_period == USE_DEFAULT_GRACE_PERIOD) + switch (cmd) { + case KFD_DEQUEUE_WAIT_INIT: + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data); + else + return 0; + break; + case KFD_DEQUEUE_WAIT_RESET: + /* function called only to get reg_offset */ + pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data); reg_data = pm->dqm->wait_times; + break; + + case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: + /* The CP cannot handle value 0 and it will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (!value) + value = 1; + pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data); + break; + } packet = (struct pm4_mec_write_data_mmio *)buffer; memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); @@ -415,7 +443,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -423,7 +451,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; @@ -434,7 +462,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), @@ -442,7 +470,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 347c86e1c378..a1de5d7e173a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources = pm_set_resources_vi, .map_queues = pm_map_queues_vi, .unmap_queues = pm_unmap_queues_vi, - .set_grace_period = NULL, + .config_dequeue_wait_counts = NULL, .query_status = pm_query_status_vi, .release_mem = pm_release_mem_vi, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = 0, + .config_dequeue_wait_counts_size = 0, .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = sizeof(struct pm4_mec_release_mem) }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 59619f794b6b..4c8026947a73 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1392,6 +1392,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) +/** + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring + * dequeue wait counts. + * + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a + * certain ASICs. For these ASICs, this is default value used by RESET + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value + * for certain ASICs. For others set it to default hardware reset value + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait + * + */ +enum kfd_config_dequeue_wait_counts_cmd { + KFD_DEQUEUE_WAIT_INIT = 1, + KFD_DEQUEUE_WAIT_RESET = 2, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3 +}; + + struct packet_manager { struct device_queue_manager *dqm; struct kernel_queue *priv_queue; @@ -1417,8 +1435,8 @@ struct packet_manager_funcs { int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset); - int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period); + int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value); int (*query_status)(struct packet_manager *pm, uint32_t *buffer, uint64_t fence_address, uint64_t fence_value); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); @@ -1429,7 +1447,7 @@ struct packet_manager_funcs { int set_resources_size; int map_queues_size; int unmap_queues_size; - int set_grace_period_size; + int config_dequeue_wait_counts_size; int query_status_size; int release_mem_size; }; @@ -1452,7 +1470,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, void pm_release_ib(struct packet_manager *pm); -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t wait_counts_config); /* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); -- 2.34.1