Andrey You patch looks breaks the logic for SRIOV, please check function "xgpu_ai_mailbox_flr_work" This function manually triggers GPU_RECOVER by the will of hypervisor. Your check of : + if (!amdgpu_gpu_recovery) { + DRM_INFO("GPU recovery disabled.\n"); + return 0; + } Actually breaks the SRIOV logic I have two idea: 1) Please change to : If (!amdgpu_gpu_recover && !amdgpu_sriov_vf(adev)) 2) please add another parameter "force_gpu_recover", and set it to true in driver init stage, and In your check, you can change to: if(!amdgpu_gpu_recovery && !force_gpu_recover) BR Monk -----Original Message----- From: amd-gfx [mailto:amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx] On Behalf Of Andrey Grodzovsky Sent: 2017å¹´12æ??13æ?¥ 3:16 To: amd-gfx at lists.freedesktop.org; Koenig, Christian <Christian.Koenig at amd.com> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>; Liu, Monk <Monk.Liu at amd.com>; maraeo at gmail.com Subject: [PATCH] drm/amdgpu: Add gpu_recovery parameter Add new parameter to control GPU recovery procedure. Retire old way of disabling GPU recovery by setting lockup_timeout == 0 and set default for lockup_timeout to 10s. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3735500..26abe03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; +extern int amdgpu_gpu_recovery; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8d03baa..d84b57a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3030,6 +3030,11 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) return 0; } + if (!amdgpu_gpu_recovery) { + DRM_INFO("GPU recovery disabled.\n"); + return 0; + } + dev_info(adev->dev, "GPU reset begin!\n"); mutex_lock(&adev->lock_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b039bd..5c612e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 0; +int amdgpu_lockup_timeout = 10000; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; +int amdgpu_gpu_recovery = 1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)"); +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default +10000)"); module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = +enable (default) , 0 = disable"); module_param_named(gpu_recovery, +amdgpu_gpu_recovery, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx at lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx