On 12/13/2017 07:20 AM, Christian König wrote: > Am 12.12.2017 um 20:16 schrieb Andrey Grodzovsky: >> Add new parameter to control GPU recovery procedure. >> Retire old way of disabling GPU recovery by setting lockup_timeout == >> 0 and >> set default for lockup_timeout to 10s. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> >> --- >>  drivers/gpu/drm/amd/amdgpu/amdgpu.h       | 1 + >>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ >>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 8 ++++++-- >>  3 files changed, 12 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 3735500..26abe03 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; >>  extern int amdgpu_job_hang_limit; >>  extern int amdgpu_lbpw; >>  extern int amdgpu_compute_multipipe; >> +extern int amdgpu_gpu_recovery; >>   #ifdef CONFIG_DRM_AMDGPU_SI >>  extern int amdgpu_si_support; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 8d03baa..d84b57a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -3030,6 +3030,11 @@ int amdgpu_gpu_recover(struct amdgpu_device >> *adev, struct amdgpu_job *job) >>          return 0; >>      } >>  +   if (!amdgpu_gpu_recovery) { >> +       DRM_INFO("GPU recovery disabled.\n"); >> +       return 0; >> +   } >> + > > Please move this check into the caller of amdgpu_gpu_recover(). > > This way we can still trigger a GPU recovery manually or from the > hypervisor under SRIOV. > > Christian. Problem with this is that amdgpu_check_soft_reset will not be called, this function which prints which IP block was hung even when later we opt not to recover. I suggest instead to add a bool force_reset parameter to amdgpu_gpu_recover which will override amdgpu_gpu_recovery and we can set it to true from amdgpu_debugfs_gpu_recover only. Thanks, Andrey > >>      dev_info(adev->dev, "GPU reset begin!\n"); >>       mutex_lock(&adev->lock_reset); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> index 0b039bd..5c612e9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> @@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0; >>  int amdgpu_hw_i2c = 0; >>  int amdgpu_pcie_gen2 = -1; >>  int amdgpu_msi = -1; >> -int amdgpu_lockup_timeout = 0; >> +int amdgpu_lockup_timeout = 10000; >>  int amdgpu_dpm = -1; >>  int amdgpu_fw_load_type = -1; >>  int amdgpu_aspm = -1; >> @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; >>  int amdgpu_job_hang_limit = 0; >>  int amdgpu_lbpw = -1; >>  int amdgpu_compute_multipipe = -1; >> +int amdgpu_gpu_recovery = 1; >>   MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in >> megabytes"); >>  module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); >> @@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, >> int, 0444); >>  MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = >> auto)"); >>  module_param_named(msi, amdgpu_msi, int, 0444); >>  -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms >> (default 0 = disable)"); >> +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default >> 10000)"); >>  module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); >>   MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = >> auto)"); >> @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); >>  MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be >> spread across pipes (1 = enable, 0 = disable, -1 = auto)"); >>  module_param_named(compute_multipipe, amdgpu_compute_multipipe, >> int, 0444); >>  +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 >> = enable (default) , 0 = disable"); >> +module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); >> + >>  #ifdef CONFIG_DRM_AMDGPU_SI >>   #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx