[AMD Official Use Only] -----Original Message----- From: Joshi, Mukul <Mukul.Joshi@xxxxxxx> Sent: Wednesday, August 11, 2021 10:57 AM To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Cornwall, Jay <Jay.Cornwall@xxxxxxx> Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler [AMD Official Use Only] > -----Original Message----- > From: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx> > Sent: Tuesday, August 10, 2021 9:15 PM > To: Joshi, Mukul <Mukul.Joshi@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Cornwall, Jay > <Jay.Cornwall@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx> > Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler > > [AMD Official Use Only] > > Just few comments inline. With that acknowledged Reviewed-by: Harish > Kasiviswanathan <Harish.Kasiviswanathan@xxxxxxx> > > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > Mukul Joshi > Sent: Monday, August 9, 2021 4:41 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Cornwall, Jay > <Jay.Cornwall@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx> > Subject: [PATCH] drm/amdkfd: CWSR with software scheduler > > This patch adds support to program trap handler settings when loading > driver with software scheduler (sched_policy=2). > > Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> > Suggested-by: Jay Cornwall <Jay.Cornwall@xxxxxxx> > --- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 31 +++++++++++++++++ > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 31 +++++++++++++++++ > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33 > ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.c | > 20 +++++++++-- > .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++ > 5 files changed, 115 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 491acdf92f73..960acf68150a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, > void *mqd, > case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: > type = RESET_WAVES; > break; > + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: > + type = SAVE_WAVES; > + break; > default: > type = DRAIN_PIPE; > break; > @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct > kgd_dev *kgd, uint32_t vmid, > adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } > > +static void program_trap_handler_settings(struct kgd_dev *kgd, > + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + > + lock_srbm(kgd, 0, 0, 0, vmid); > + > + /* > + * Program TBA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), > + lower_32_bits(tba_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), > + upper_32_bits(tba_addr >> 8) | > + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); > + > + /* > + * Program TMA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), > + lower_32_bits(tma_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), > + upper_32_bits(tma_addr >> 8)); > + > + unlock_srbm(kgd); > +} > + > const struct kfd2kgd_calls gfx_v10_kfd2kgd = { > .program_sh_mem_settings = kgd_program_sh_mem_settings, > .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4 > +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { > .get_atc_vmid_pasid_mapping_info = > get_atc_vmid_pasid_mapping_info, > .set_vm_context_page_table_base = > set_vm_context_page_table_base, > + .program_trap_handler_settings = program_trap_handler_settings, > > [HK]: Naming not consistent. program_trap_handler_settings, > program_trap_handler_settings_v10_3 and > kgd_gfx_v9_program_trap_handler_settings > I am following the naming convention for the other functions defined in these files. That's the reason the functions names are different. I don't mind naming them all the same. [HK]: Thanks for clarifying. I am fine with as it is. > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > index 1f5620cc3570..dac0d751d5af 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > @@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, > void *mqd, > case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: > type = RESET_WAVES; > break; > + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: > + type = SAVE_WAVES; > + break; > default: > type = DRAIN_PIPE; > break; > @@ -658,6 +661,33 @@ static void > set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v > adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } > > +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, > + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + > + lock_srbm(kgd, 0, 0, 0, vmid); > + > + /* > + * Program TBA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), > + lower_32_bits(tba_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), > + upper_32_bits(tba_addr >> 8) | > + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); > + > + /* > + * Program TMA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), > + lower_32_bits(tma_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), > + upper_32_bits(tma_addr >> 8)); > + > + unlock_srbm(kgd); > +} > + > #if 0 > uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, > uint32_t trap_debug_wave_launch_mode, @@ > -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { > .address_watch_get_offset = address_watch_get_offset_v10_3, > .get_atc_vmid_pasid_mapping_info = NULL, > .set_vm_context_page_table_base = > set_vm_context_page_table_base_v10_3, > + .program_trap_handler_settings = > program_trap_handler_settings_v10_3, > #if 0 > .enable_debug_trap = enable_debug_trap_v10_3, > .disable_debug_trap = disable_debug_trap_v10_3, diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index ed3014fbb563..154244916727 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -42,7 +42,8 @@ > enum hqd_dequeue_request_type { > NO_ACTION = 0, > DRAIN_PIPE, > - RESET_WAVES > + RESET_WAVES, > + SAVE_WAVES > }; > > static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev > *kgd) @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev > *kgd, void *mqd, > case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: > type = RESET_WAVES; > break; > + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: > + type = SAVE_WAVES; > + break; > default: > type = DRAIN_PIPE; > break; > @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev > *kgd, int pasid, > adev->gfx.cu_info.max_waves_per_simd; > } > > +static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, > + uint32_t vmid, uint64_t tba_addr, uint64_t > +tma_addr) { > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + > + lock_srbm(kgd, 0, 0, 0, vmid); > + > + /* > + * Program TBA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), > + lower_32_bits(tba_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), > + upper_32_bits(tba_addr >> 8)); > + > + /* > + * Program TMA registers > + */ > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), > + lower_32_bits(tma_addr >> 8)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), > + upper_32_bits(tma_addr >> 8)); > + > + unlock_srbm(kgd); > +} > + > const struct kfd2kgd_calls gfx_v9_kfd2kgd = { > .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, > .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, > @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { > kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, > .set_vm_context_page_table_base = > kgd_gfx_v9_set_vm_context_page_table_base, > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > + .program_trap_handler_settings = > +kgd_gfx_v9_program_trap_handler_settings, > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index a972ef5eae68..6fd6b2248992 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -211,6 +211,14 @@ static void deallocate_doorbell(struct > qcm_process_device *qpd, > WARN_ON(!old); > } > > +static void program_trap_handler_settings(struct device_queue_manager > *dqm, > + struct qcm_process_device *qpd) > +{ > + return dqm->dev->kfd2kgd->program_trap_handler_settings( > + dqm->dev->kgd, qpd->vmid, > + qpd->tba_addr, qpd- > >tma_addr); > > [HK]: Since, this is not defined for all GFX generations, safer to > check if function pointer is not NULL. I see it is called only VEGA10+ > Sure I can add that. However, I am making sure that the function is defined for all ASICs after VEGA10+. Regards, Mukul > +} > + > static int allocate_vmid(struct device_queue_manager *dqm, > struct qcm_process_device *qpd, > struct queue *q) > @@ -241,6 +249,10 @@ static int allocate_vmid(struct > device_queue_manager *dqm, > > program_sh_mem_settings(dqm, qpd); > > + if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && > + dqm->dev->cwsr_enabled) > + program_trap_handler_settings(dqm, qpd); > + > /* qpd->page_table_base is set earlier when register_process() > * is called, i.e. when the first queue is created. > */ > @@ -582,7 +594,9 @@ static int update_queue(struct > device_queue_manager *dqm, struct queue *q) > } > > retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, > - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, > + (dqm->dev->cwsr_enabled? > + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: > + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), > KFD_UNMAP_LATENCY_MS, q->pipe, q- > >queue); > if (retval) { > pr_err("destroy mqd failed\n"); > @@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct > device_queue_manager *dqm, > continue; > > retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, > - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, > + (dqm->dev->cwsr_enabled? > + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: > + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), > KFD_UNMAP_LATENCY_MS, q->pipe, q- > >queue); > if (retval && !ret) > /* Return the first error, but keep going to diff --git > a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > index 95c656d205ed..c84bd7b2cf59 100644 > --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > @@ -44,6 +44,7 @@ struct kgd_mem; > enum kfd_preempt_type { > KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, > KFD_PREEMPT_TYPE_WAVEFRONT_RESET, > + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE > }; > > struct kfd_vm_fault_info { > @@ -298,6 +299,8 @@ struct kfd2kgd_calls { > > void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int > *wave_cnt, > int *max_waves_per_cu); > + void (*program_trap_handler_settings)(struct kgd_dev *kgd, > + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); > }; > > #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ > -- > 2.17.1