There are use cases which need full GPU functionality (e.g., VM update, TLB inavildate) when doing a GPU reset. Especially, the mes/umsch self tests which help validate the hw state after hw init like ring/ib tests. Add a post reset IP callback to handle such use cases which will be executed after GPU reset succeeds. Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++++++++++++++++++++ drivers/gpu/drm/amd/include/amd_shared.h | 3 +++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 12dc71a6b5db..feeab9397aab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5556,6 +5556,27 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } +static int amdgpu_device_ip_post_reset(struct amdgpu_device *adev) +{ + uint32_t i; + int r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid || + !adev->ip_blocks[i].version->funcs->post_reset) + continue; + + r = adev->ip_blocks[i].version->funcs->post_reset(adev); + if (r) { + DRM_ERROR("post reset of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return r; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5805,6 +5826,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_put_xgmi_hive(hive); } + if (!r && !job_signaled) + r = amdgpu_device_ip_post_reset(adev); + if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index b0a6256e89f4..33ce30a8e3ab 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -287,6 +287,7 @@ enum amd_dpm_forced_level; * @pre_soft_reset: pre soft reset the IP block * @soft_reset: soft reset the IP block * @post_soft_reset: post soft reset the IP block + * @post_reset: handles IP specific post reset stuff(e.g., self test) * @set_clockgating_state: enable/disable cg for the IP block * @set_powergating_state: enable/disable pg for the IP block * @get_clockgating_state: get current clockgating status @@ -316,11 +317,13 @@ struct amd_ip_funcs { int (*pre_soft_reset)(void *handle); int (*soft_reset)(void *handle); int (*post_soft_reset)(void *handle); + int (*post_reset)(void *handle); int (*set_clockgating_state)(void *handle, enum amd_clockgating_state state); int (*set_powergating_state)(void *handle, enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); + }; -- 2.25.1