From: Xiaogang Chen <xiaogang.chen@xxxxxxx> During KFD restore evicted userptr BOs mmu invalidate callback may invalidate same userptr BOs that have been just restored. When KFD restore process detects it KFD will reschedule another validation process. It is not an error. Change WARN to pr_debug, not put the BOs at userptr_valid_list, let next scheduled delayed work validate them again. Signed-off-by: Xiaogang Chen <Xiaogang.Chen@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7b1f5933ebaa..d0c224703278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2581,11 +2581,18 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i mem->range = NULL; if (!valid) { - WARN(!mem->invalid, "Invalid BO not marked invalid"); + if (!mem->invalid) + pr_debug("Invalid BO not marked invalid\n"); + + ret = -EAGAIN; + continue; + } + + if (mem->invalid) { + pr_debug("Valid BO is marked invalid\n"); ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); @@ -2648,7 +2655,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) goto unlock_notifier_out; if (confirm_valid_user_pages_locked(process_info)) { - WARN(1, "User pages unexpectedly invalid"); + pr_debug("User pages unexpectedly invalid, reschedule another attempt\n"); goto unlock_notifier_out; } -- 2.25.1