RE: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Kasiviswanathan, Harish <Harish.Kasiviswanathan@xxxxxxx>
> Sent: Thursday, August 15, 2024 4:32 PM
> To: Joshi, Mukul <Mukul.Joshi@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Deucher, Alexander
> <Alexander.Deucher@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx>
> Subject: RE: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and
> Resume APIs for GFX11
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Mukul
> Joshi
> Sent: Wednesday, August 14, 2024 7:28 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Deucher, Alexander
> <Alexander.Deucher@xxxxxxx>; Joshi, Mukul <Mukul.Joshi@xxxxxxx>
> Subject: [PATCHv2 1/3] drm/amdgpu: Implement MES Suspend and Resume
> APIs for GFX11
>
> Add implementation for MES Suspend and Resume APIs to unmap/map all
> queues for GFX11. Support for GFX12 will be added when the corresponding
> firmware support is in place.
>
> Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx>
> ---
> v1->v2:
> - Add MES FW version check.
> - Update amdgpu_mes_suspend/amdgpu_mes_resume handling.
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++--------
> ----  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  2 +
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 32 ++++++++++-
>  3 files changed, 69 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index c598c3edff7e..e551943da77a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct
> amdgpu_device *adev, int gang_id)
>
>  int amdgpu_mes_suspend(struct amdgpu_device *adev)  {
> -       struct idr *idp;
> -       struct amdgpu_mes_process *process;
> -       struct amdgpu_mes_gang *gang;
>         struct mes_suspend_gang_input input;
> -       int r, pasid;
> +       int r;
> +
> +       if (!amdgpu_mes_suspend_resume_all_supported(adev))
> +               return 0;
> +
> +       memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
> +       input.suspend_all_gangs = 1;
>
>         /*
>          * Avoid taking any other locks under MES lock to avoid circular
>          * lock dependencies.
>          */
>         amdgpu_mes_lock(&adev->mes);
> -
> -       idp = &adev->mes.pasid_idr;
> -
> -       idr_for_each_entry(idp, process, pasid) {
> -               list_for_each_entry(gang, &process->gang_list, list) {
> -                       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
> -                       if (r)
> -                               DRM_ERROR("failed to suspend pasid %d gangid %d",
> -                                        pasid, gang->gang_id);
> -               }
> -       }
> -
> +       r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
>         amdgpu_mes_unlock(&adev->mes);
> -       return 0;
> +       if (r)
> +               DRM_ERROR("failed to suspend all gangs");
> +
> +       return r;
>  }
>
>  int amdgpu_mes_resume(struct amdgpu_device *adev)  {
> -       struct idr *idp;
> -       struct amdgpu_mes_process *process;
> -       struct amdgpu_mes_gang *gang;
>         struct mes_resume_gang_input input;
> -       int r, pasid;
> +       int r;
> +
> +       if (!amdgpu_mes_suspend_resume_all_supported(adev))
> +               return 0;
> +
> +       memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
> +       input.resume_all_gangs = 1;
>
>         /*
>          * Avoid taking any other locks under MES lock to avoid circular
>          * lock dependencies.
>          */
>         amdgpu_mes_lock(&adev->mes);
> -
> -       idp = &adev->mes.pasid_idr;
> -
> -       idr_for_each_entry(idp, process, pasid) {
> -               list_for_each_entry(gang, &process->gang_list, list) {
> -                       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
> -                       if (r)
> -                               DRM_ERROR("failed to resume pasid %d gangid %d",
> -                                        pasid, gang->gang_id);
> -               }
> -       }
> -
> +       r = adev->mes.funcs->resume_gang(&adev->mes, &input);
>         amdgpu_mes_unlock(&adev->mes);
> -       return 0;
> +       if (r)
> +               DRM_ERROR("failed to resume all gangs");
> +
> +       return r;
>  }
>
>  static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -
> 1608,6 +1598,19 @@ int amdgpu_mes_init_microcode(struct
> amdgpu_device *adev, int pipe)
>         return r;
>  }
>
> +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device
> +*adev) {
> +       uint32_t mes_rev = adev->mes.sched_version &
> AMDGPU_MES_VERSION_MASK;
> +       bool is_supported = false;
> +
> +       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
> +           amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
> +           mes_rev >= 0x63)
> +               is_supported = true;
> +
> +       return is_supported;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>
>  static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void
> *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 548e724e3a75..b2db62e50454 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -494,4 +494,6 @@ static inline void amdgpu_mes_unlock(struct
> amdgpu_mes *mes)
>         memalloc_noreclaim_restore(mes->saved_flags);
>         mutex_unlock(&mes->mutex_hidden);  }
> +
> +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device
> +*adev);
>  #endif /* __AMDGPU_MES_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index c0340ee3dec0..a5c582674db9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -421,13 +421,41 @@ static int mes_v11_0_unmap_legacy_queue(struct
> amdgpu_mes *mes,  static int mes_v11_0_suspend_gang(struct amdgpu_mes
> *mes,
>                                   struct mes_suspend_gang_input *input)  {
> -       return 0;
> +       union MESAPI__SUSPEND mes_suspend_gang_pkt;
> +
> +       memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
> +
> +       mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
> +       mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
> +       mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> +       mes_suspend_gang_pkt.suspend_all_gangs =
> + input->suspend_all_gangs;
>
> Only suspend_all_gangs and resume_all_gangs are set by the caller. None of
> the variables below are set by caller. Is that ok?

Yes that is fine. The callers of these suspend/resume functions will set the appropriate
values based on the need.
For our case, we need to suspend and resume all queues. Hence, the callers set suspend_all_gangs
and resume_all_gangs=1.

Thanks,
Mukul

>
> +       mes_suspend_gang_pkt.gang_context_addr = input-
> >gang_context_addr;
> +       mes_suspend_gang_pkt.suspend_fence_addr = input-
> >suspend_fence_addr;
> +       mes_suspend_gang_pkt.suspend_fence_value =
> + input->suspend_fence_value;
> +
> +       return mes_v11_0_submit_pkt_and_poll_completion(mes,
> +                       &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
> +                       offsetof(union MESAPI__SUSPEND, api_status));
>  }
>
>  static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
>                                  struct mes_resume_gang_input *input)  {
> -       return 0;
> +       union MESAPI__RESUME mes_resume_gang_pkt;
> +
> +       memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
> +
> +       mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
> +       mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
> +       mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> +       mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
> +       mes_resume_gang_pkt.gang_context_addr =
> + input->gang_context_addr;
> +
> +       return mes_v11_0_submit_pkt_and_poll_completion(mes,
> +                       &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
> +                       offsetof(union MESAPI__RESUME, api_status));
>  }
>
>  static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
> --
> 2.35.1
>





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux