On Tue, Dec 12, 2017 at 2:16 PM, Andrey Grodzovsky <andrey.grodzovsky at amd.com> wrote: > Add new parameter to control GPU recovery procedure. > Retire old way of disabling GPU recovery by setting lockup_timeout == 0 and > set default for lockup_timeout to 10s. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++-- > 3 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 3735500..26abe03 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; > extern int amdgpu_job_hang_limit; > extern int amdgpu_lbpw; > extern int amdgpu_compute_multipipe; > +extern int amdgpu_gpu_recovery; > > #ifdef CONFIG_DRM_AMDGPU_SI > extern int amdgpu_si_support; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 8d03baa..d84b57a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3030,6 +3030,11 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) > return 0; > } > > + if (!amdgpu_gpu_recovery) { > + DRM_INFO("GPU recovery disabled.\n"); > + return 0; > + } Probably need and else here for the -1 (auto) case so we can disable by default for non-SR-IOV and always keep it enabled for SR-IOV. > + > dev_info(adev->dev, "GPU reset begin!\n"); > > mutex_lock(&adev->lock_reset); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 0b039bd..5c612e9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0; > int amdgpu_hw_i2c = 0; > int amdgpu_pcie_gen2 = -1; > int amdgpu_msi = -1; > -int amdgpu_lockup_timeout = 0; > +int amdgpu_lockup_timeout = 10000; > int amdgpu_dpm = -1; > int amdgpu_fw_load_type = -1; > int amdgpu_aspm = -1; > @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; > int amdgpu_job_hang_limit = 0; > int amdgpu_lbpw = -1; > int amdgpu_compute_multipipe = -1; > +int amdgpu_gpu_recovery = 1; > > MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); > module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); > @@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); > MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); > module_param_named(msi, amdgpu_msi, int, 0444); > > -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)"); > +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 10000)"); Make this a separate change. > module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); > > MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); > @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); > MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); > module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); > > +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable (default) , 0 = disable"); > +module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); > + set the default to -1 so we can have different default behavior in different cases, e.g., different asics or environments (like SR-IOV). Alex > #ifdef CONFIG_DRM_AMDGPU_SI > > #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx