[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx> > Sent: Thursday, August 15, 2024 4:32 PM > To: Joshi, Mukul <Mukul.Joshi@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Deucher, Alexander > <Alexander.Deucher@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx> > Subject: RE: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and > Resume APIs for GFX11 > > [AMD Official Use Only - AMD Internal Distribution Only] > > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Mukul > Joshi > Sent: Wednesday, August 14, 2024 7:28 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Deucher, Alexander > <Alexander.Deucher@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx> > Subject: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume > APIs for GFX11 > > Add implementation for MES Suspend and Resume APIs to unmap/map all > queues for GFX11. Support for GFX12 will be added when the corresponding > firmware support is in place. > > Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> > --- > v1->v2: > - Add MES FW version check. > - Update amdgpu_mes_suspend/amdgpu_mes_resume handling. > > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++-------- > ---- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 + > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 32 ++++++++++- > 3 files changed, 69 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index c598c3edff7e..e551943da77a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct > amdgpu_device *adev, int gang_id) > > int amdgpu_mes_suspend(struct amdgpu_device *adev) { > - struct idr *idp; > - struct amdgpu_mes_process *process; > - struct amdgpu_mes_gang *gang; > struct mes_suspend_gang_input input; > - int r, pasid; > + int r; > + > + if (!amdgpu_mes_suspend_resume_all_supported(adev)) > + return 0; > + > + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); > + input.suspend_all_gangs = 1; > > /* > * Avoid taking any other locks under MES lock to avoid circular > * lock dependencies. > */ > amdgpu_mes_lock(&adev->mes); > - > - idp = &adev->mes.pasid_idr; > - > - idr_for_each_entry(idp, process, pasid) { > - list_for_each_entry(gang, &process->gang_list, list) { > - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); > - if (r) > - DRM_ERROR("failed to suspend pasid %d gangid %d", > - pasid, gang->gang_id); > - } > - } > - > + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); > amdgpu_mes_unlock(&adev->mes); > - return 0; > + if (r) > + DRM_ERROR("failed to suspend all gangs"); > + > + return r; > } > > int amdgpu_mes_resume(struct amdgpu_device *adev) { > - struct idr *idp; > - struct amdgpu_mes_process *process; > - struct amdgpu_mes_gang *gang; > struct mes_resume_gang_input input; > - int r, pasid; > + int r; > + > + if (!amdgpu_mes_suspend_resume_all_supported(adev)) > + return 0; > + > + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); > + input.resume_all_gangs = 1; > > /* > * Avoid taking any other locks under MES lock to avoid circular > * lock dependencies. > */ > amdgpu_mes_lock(&adev->mes); > - > - idp = &adev->mes.pasid_idr; > - > - idr_for_each_entry(idp, process, pasid) { > - list_for_each_entry(gang, &process->gang_list, list) { > - r = adev->mes.funcs->resume_gang(&adev->mes, &input); > - if (r) > - DRM_ERROR("failed to resume pasid %d gangid %d", > - pasid, gang->gang_id); > - } > - } > - > + r = adev->mes.funcs->resume_gang(&adev->mes, &input); > amdgpu_mes_unlock(&adev->mes); > - return 0; > + if (r) > + DRM_ERROR("failed to resume all gangs"); > + > + return r; > } > > static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ - > 1608,6 +1598,19 @@ int amdgpu_mes_init_microcode(struct > amdgpu_device *adev, int pipe) > return r; > } > > +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device > +*adev) { > + uint32_t mes_rev = adev->mes.sched_version & > AMDGPU_MES_VERSION_MASK; > + bool is_supported = false; > + > + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && > + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && > + mes_rev >= 0x63) > + is_supported = true; > + > + return is_supported; > +} > + > #if defined(CONFIG_DEBUG_FS) > > static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void > *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index 548e724e3a75..b2db62e50454 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -494,4 +494,6 @@ static inline void amdgpu_mes_unlock(struct > amdgpu_mes *mes) > memalloc_noreclaim_restore(mes->saved_flags); > mutex_unlock(&mes->mutex_hidden); } > + > +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device > +*adev); > #endif /* __AMDGPU_MES_H__ */ > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index c0340ee3dec0..a5c582674db9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -421,13 +421,41 @@ static int mes_v11_0_unmap_legacy_queue(struct > amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes > *mes, > struct mes_suspend_gang_input *input) { > - return 0; > + union MESAPI__SUSPEND mes_suspend_gang_pkt; > + > + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); > + > + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; > + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; > + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + > + mes_suspend_gang_pkt.suspend_all_gangs = > + input->suspend_all_gangs; > > Only suspend_all_gangs and resume_all_gangs are set by the caller. None of > the variables below are set by caller. Is that ok? Yes that is fine. The callers of these suspend/resume functions will set the appropriate values based on the need. For our case, we need to suspend and resume all queues. Hence, the callers set suspend_all_gangs and resume_all_gangs=1. Thanks, Mukul > > + mes_suspend_gang_pkt.gang_context_addr = input- > >gang_context_addr; > + mes_suspend_gang_pkt.suspend_fence_addr = input- > >suspend_fence_addr; > + mes_suspend_gang_pkt.suspend_fence_value = > + input->suspend_fence_value; > + > + return mes_v11_0_submit_pkt_and_poll_completion(mes, > + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), > + offsetof(union MESAPI__SUSPEND, api_status)); > } > > static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, > struct mes_resume_gang_input *input) { > - return 0; > + union MESAPI__RESUME mes_resume_gang_pkt; > + > + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); > + > + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; > + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; > + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + > + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; > + mes_resume_gang_pkt.gang_context_addr = > + input->gang_context_addr; > + > + return mes_v11_0_submit_pkt_and_poll_completion(mes, > + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), > + offsetof(union MESAPI__RESUME, api_status)); > } > > static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) > -- > 2.35.1 >