[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Monday, August 12, 2024 9:46 AM > To: Skvortsov, Victor <Victor.Skvortsov@xxxxxxx>; amd- > gfx@xxxxxxxxxxxxxxxxxxxxx; Luo, Zhigang <Zhigang.Luo@xxxxxxx> > Subject: Re: [PATCH v2] drm/amdgpu: Disable dpm_enabled flag while VF is in > reset > > > > On 8/12/2024 6:39 PM, Victor Skvortsov wrote: > > VFs do not perform HW fini/suspend in FLR, so the dpm_enabled is > > incorrectly kept enabled. Add interface to disable it in > > virt_pre_reset call. > > > > v2: Made implementation generic for all asics > > > > Signed-off-by: Victor Skvortsov <victor.skvortsov@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++---- > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + > > drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + > > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 5 ++++- > > 5 files changed, 16 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index 29a4adee9286..a6b8d0ba4758 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct > amdgpu_device *adev, > > if (reset_context->reset_req_dev == adev) > > job = reset_context->job; > > > > - if (amdgpu_sriov_vf(adev)) { > > - /* stop the data exchange thread */ > > - amdgpu_virt_fini_data_exchange(adev); > > - } > > + if (amdgpu_sriov_vf(adev)) > > + amdgpu_virt_pre_reset(adev); > > > > amdgpu_fence_driver_isr_toggle(adev, true); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > index b287a82e6177..b6397d3229e1 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > @@ -33,6 +33,7 @@ > > #include "amdgpu.h" > > #include "amdgpu_ras.h" > > #include "amdgpu_reset.h" > > +#include "amdgpu_dpm.h" > > #include "vi.h" > > #include "soc15.h" > > #include "nv.h" > > @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode > amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad > > return mode; > > } > > > > +void amdgpu_virt_pre_reset(struct amdgpu_device *adev) { > > + /* stop the data exchange thread */ > > + amdgpu_virt_fini_data_exchange(adev); > > + amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR); } > > + > > void amdgpu_virt_post_reset(struct amdgpu_device *adev) { > > if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > index b42a8854dca0..b650a2032c42 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device > *adev, > > u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool > > amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, > > uint32_t ucode_id); > > +void amdgpu_virt_pre_reset(struct amdgpu_device *adev); > > void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool > > amdgpu_sriov_xnack_support(struct amdgpu_device *adev); bool > > amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, diff > > --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h > > b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > > index 4b20e2274313..19a48d98830a 100644 > > --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h > > +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > > @@ -218,6 +218,7 @@ enum pp_mp1_state { > > PP_MP1_STATE_SHUTDOWN, > > PP_MP1_STATE_UNLOAD, > > PP_MP1_STATE_RESET, > > + PP_MP1_STATE_FLR, > > }; > > > > enum pp_df_cstate { > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > index 8b7d6ed7e2ed..af39206a2c5f 100644 > > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > @@ -168,7 +168,10 @@ int amdgpu_dpm_set_mp1_state(struct > amdgpu_device *adev, > > int ret = 0; > > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > > > > - if (pp_funcs && pp_funcs->set_mp1_state) { > > + if (amdgpu_sriov_vf(adev) && mp1_state == PP_MP1_STATE_FLR) { > > + /* VF lost access to SMU */ > > + adev->pm.dpm_enabled = false; > > For non-VF devices, PP_MP1_STATE_FLR needs to be a don't care. > Preferrably, something like > > if (mp1_state == PP_MP1_STATE_FLR) { > if (amdgpu_sriov_vf(adev)) > adev->pm.dpm_enabled = false; > }else { .. > } > > Thanks, > Lijo I agree it's cleaner like that. Will send out v3. Thanks, Victor > > + } else if (pp_funcs && pp_funcs->set_mp1_state) { > > mutex_lock(&adev->pm.mutex); > > > > ret = pp_funcs->set_mp1_state(