Preparing for asynchronous processing of umc page retirement. Signed-off-by: YiPeng Chai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 ++++ 2 files changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a988360ce3e2..856206e95842 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2656,6 +2656,25 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + atomic_read(&con->page_retirement_req_cnt)); + + dev_info(adev->dev, "Start processing page retirement. request:%d\n", + atomic_read(&con->page_retirement_req_cnt)); + + atomic_dec(&con->page_retirement_req_cnt); + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2719,6 +2738,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + #ifdef CONFIG_X86_MCE_AMD #ifdef HAVE_SMCA_UMC_V2 if ((adev->asic_type == CHIP_ALDEBARAN) && @@ -2757,6 +2786,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 2cd89328dc73..9c3df9985fad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -461,6 +461,11 @@ struct amdgpu_ras { /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; + + struct task_struct *page_retirement_thread; + wait_queue_head_t page_retirement_wq; + struct mutex page_retirement_lock; + atomic_t page_retirement_req_cnt; }; struct ras_fs_data { -- 2.34.1