[Public] > -----Original Message----- > From: Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx> > Sent: Thursday, July 6, 2023 2:19 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; Huang, JinHuiEric > <JinHuiEric.Huang@xxxxxxx> > Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance > > each xcc instance needs to get iq wait time and set > grace period accordingly. > > Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx> > --- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++-- > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 32 +++++++++++-------- > .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 9 +++--- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- > 5 files changed, 32 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index a2bff3f01359..0f12c1989e14 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct > device_queue_manager *dqm) > > static int initialize_cpsch(struct device_queue_manager *dqm) > { > + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask; > + > pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); > > mutex_init(&dqm->lock_hidden); > @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct > device_queue_manager *dqm) > init_sdma_bitmaps(dqm); > > if (dqm->dev->kfd2kgd->get_iq_wait_times) > - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, > - &dqm->wait_times, 0); > + for_each_inst(xcc_id, xcc_mask) > + dqm->dev->kfd2kgd->get_iq_wait_times( > + dqm->dev->adev, > + &dqm->wait_times[xcc_id], > + xcc_id); > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index 7dd4b177219d..62a6dc8d3032 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -262,7 +262,7 @@ struct device_queue_manager { > /* used for GFX 9.4.3 only */ > uint32_t current_logical_xcc_start; > > - uint32_t wait_times; > + uint32_t wait_times[32]; I think wait_times[16] should be sufficient. We only get the hamming weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a uint16_t in the KGD portion anyway. We may as well align to that. > > wait_queue_head_t destroy_wait; > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > index 401096c103b2..f37ab4b6d88c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > @@ -374,27 +374,31 @@ int pm_update_grace_period(struct > packet_manager *pm, uint32_t grace_period) > { > int retval = 0; > uint32_t *buffer, size; > + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask; > > size = pm->pmf->set_grace_period_size; > > mutex_lock(&pm->lock); > > if (size) { > - kq_acquire_packet_buffer(pm->priv_queue, > - size / sizeof(uint32_t), > - (unsigned int **)&buffer); > - > - if (!buffer) { > - pr_err("Failed to allocate buffer on kernel queue\n"); > - retval = -ENOMEM; > - goto out; > - } > + for_each_inst(xcc_id, xcc_mask) { > + kq_acquire_packet_buffer(pm->priv_queue, > + size / sizeof(uint32_t), > + (unsigned int **)&buffer); > > - retval = pm->pmf->set_grace_period(pm, buffer, > grace_period); > - if (!retval) > - kq_submit_packet(pm->priv_queue); > - else > - kq_rollback_packet(pm->priv_queue); > + if (!buffer) { > + pr_err("Failed to allocate buffer on kernel > queue\n"); > + retval = -ENOMEM; > + goto out; > + } > + > + retval = pm->pmf->set_grace_period(pm, buffer, > + grace_period, xcc_id); > + if (!retval) > + kq_submit_packet(pm->priv_queue); > + else > + kq_rollback_packet(pm->priv_queue); In the event of partial success do we need to roll back (i.e. resubmit default grace period) on failure? I believe the default grace period is put in place for better CWSR performance in normal mode, so leaving fast preemption settings on failure could impact performance. Thanks, Jon > + } > } > > out: > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > index 8fda16e6fee6..a9443d661957 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > @@ -287,7 +287,8 @@ static int pm_map_queues_v9(struct packet_manager > *pm, uint32_t *buffer, > > static int pm_set_grace_period_v9(struct packet_manager *pm, > uint32_t *buffer, > - uint32_t grace_period) > + uint32_t grace_period, > + uint32_t inst) > { > struct pm4_mec_write_data_mmio *packet; > uint32_t reg_offset = 0; > @@ -295,14 +296,14 @@ static int pm_set_grace_period_v9(struct > packet_manager *pm, > > pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( > pm->dqm->dev->adev, > - pm->dqm->wait_times, > + pm->dqm->wait_times[inst], > grace_period, > ®_offset, > ®_data, > - 0); > + inst); > > if (grace_period == USE_DEFAULT_GRACE_PERIOD) > - reg_data = pm->dqm->wait_times; > + reg_data = pm->dqm->wait_times[inst]; > > packet = (struct pm4_mec_write_data_mmio *)buffer; > memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index d4c9ee3f9953..22c4a403ddd7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -1400,7 +1400,7 @@ struct packet_manager_funcs { > enum kfd_unmap_queues_filter mode, > uint32_t filter_param, bool reset); > int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, > - uint32_t grace_period); > + uint32_t grace_period, uint32_t inst); > int (*query_status)(struct packet_manager *pm, uint32_t *buffer, > uint64_t fence_address, uint64_t > fence_value); > int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); > -- > 2.34.1