There are two types of gpu reset, nps mode switch and normal gpu reset, add a flag to distigush them. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 13 ++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9365b43c0055..ba9b0d322b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1681,6 +1681,7 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) } int amdgpu_in_reset(struct amdgpu_device *adev); +int amdgpu_in_nps_switch(struct amdgpu_device *adev); extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 286f0fdfcb50..d69fcbb28b0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5862,7 +5862,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* We need to lock reset domain only once both for XGMI and single device */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); + amdgpu_device_lock_reset_domain(tmp_adev); /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { @@ -6343,7 +6343,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta * Locking adev->reset_domain->sem will prevent any external access * to GPU during PCI error recovery */ - amdgpu_device_lock_reset_domain(adev->reset_domain); + amdgpu_device_lock_reset_domain(adev); amdgpu_device_set_mp1_state(adev); /* @@ -6579,6 +6579,11 @@ int amdgpu_in_reset(struct amdgpu_device *adev) return atomic_read(&adev->reset_domain->in_gpu_reset); } +int amdgpu_in_nps_switch(struct amdgpu_device *adev) +{ + return atomic_read(&adev->reset_domain->in_nps_switch); +} + /** * amdgpu_device_halt() - bring hardware to some kind of halt state * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index f4c08fa83756..1becf8fbbc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -301,15 +301,25 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work); atomic_set(&reset_domain->in_gpu_reset, 0); + atomic_set(&reset_domain->in_nps_switch, 0); atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); return reset_domain; } -void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain) +void amdgpu_device_lock_reset_domain(struct amdgpu_device *adev) { + struct amdgpu_reset_domain *reset_domain = adev->reset_domain; + atomic_set(&reset_domain->in_gpu_reset, 1); + /* The life time of in_nps_switch is longer than + * amdgpu_device_nps_switch_needed + */ + if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && + adev->nbio.funcs->is_nps_switch_requested(adev)) + atomic_set(&reset_domain->in_nps_switch, 1); + down_write(&reset_domain->sem); } @@ -317,6 +327,7 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain) void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) { atomic_set(&reset_domain->in_gpu_reset, 0); + atomic_set(&reset_domain->in_nps_switch, 0); up_write(&reset_domain->sem); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 977b2dd2205a..c74a1f88f0ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -97,6 +97,7 @@ struct amdgpu_reset_domain { enum amdgpu_reset_domain_type type; struct rw_semaphore sem; atomic_t in_gpu_reset; + atomic_t in_nps_switch; atomic_t reset_res; struct work_struct clear; bool drain; @@ -158,7 +159,7 @@ static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) return rwsem_is_contended(&domain->sem); } -void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_device_lock_reset_domain(struct amdgpu_device *adev); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index daa69dfb4dca..8387e075c385 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1540,7 +1540,7 @@ static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work) tmp_adev = list_first_entry(&device_list, struct amdgpu_device, reset_list); - amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); + amdgpu_device_lock_reset_domain(tmp_adev); reset_context.method = AMD_RESET_METHOD_ON_INIT; reset_context.reset_req_dev = tmp_adev; -- 2.34.1