[AMD Official Use Only - Internal Distribution Only] [AMD Official Use Only - Internal Distribution Only] -----Original Message----- From: Quan, Evan <Evan.Quan@xxxxxxx> Sent: Monday, July 13, 2020 11:30 AM To: Sheng, Wenhui <Wenhui.Sheng@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Gao, Likun <Likun.Gao@xxxxxxx>; Sheng, Wenhui <Wenhui.Sheng@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: RE: [PATCH 3/3] drm/amdgpu: add module parameter choose reset mode [AMD Official Use Only - Internal Distribution Only] -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Wenhui Sheng Sent: Friday, July 10, 2020 10:17 PM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Gao, Likun <Likun.Gao@xxxxxxx>; Sheng, Wenhui <Wenhui.Sheng@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: [PATCH 3/3] drm/amdgpu: add module parameter choose reset mode Default value is auto, doesn't change original reset method logic. v2: change to use parameter reset_method Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/cik.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/vi.c | 4 ++++ 6 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4de93cef79b9..06bfb8658dec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; +extern int amdgpu_reset_method; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 94c83a9d4987..ed9f32b89f36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -154,6 +154,7 @@ int amdgpu_mes = 0; int amdgpu_noretry = 1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; +int amdgpu_reset_method = -1; /* auto */ struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: reset_method (int) + * SMU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = +mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "SMU +reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 += mode2, 4 = baco)"); module_param_named(reset_method, +amdgpu_reset_method, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fe306d0f73f7..a99418cd2f5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1326,6 +1326,10 @@ cik_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; +if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) return +amdgpu_reset_method; + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..309ff402d7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -311,6 +311,10 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; +if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) return +amdgpu_reset_method; + [Quan, Evan] I kind of wonder what's the expected behavior if user specifies amdgpu_reset_mthod as "0 = legacy" here? User's settings seems ignored silently? [Wenhui, Sheng] yes, if this asic doesn't support legacy, should choose one by auto, do you think this is make sense? Do we need add more debug message here? if (smu_baco_is_support(smu)) return AMD_RESET_METHOD_BACO; else diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8c739b285915..a8a134f7bb61 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -532,6 +532,11 @@ soc15_asic_reset_method(struct amdgpu_device *adev) bool baco_reset = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); +if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || +amdgpu_reset_method == AMD_RESET_METHOD_BACO) return +amdgpu_reset_method; + switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..2cc8ad19ebed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -710,6 +710,10 @@ vi_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; +if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) return +amdgpu_reset_method; + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cevan.quan%40amd.com%7C259262f416404c23d64108d824dbf044%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637299874389349093&sdata=uyCH2W4ox82ta%2BZRO4PXeYhHf2o7RH3fMoawcsFy3M4%3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx