[AMD Official Use Only - AMD Internal Distribution Only] Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx> > -----Original Message----- > From: Hawking Zhang <Hawking.Zhang@xxxxxxx> > Sent: Friday, September 6, 2024 4:13 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Subject: [PATCH] drm/amdkfd: Select reset method for poison handling > > Driver mode-2 is only supported by relative new smc firmware. > > Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> > --- > .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 40 +++++++++++++++---- > 1 file changed, 32 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c > index fecdbbab9894..d46a13156ee9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c > @@ -167,11 +167,23 @@ static void > event_interrupt_poison_consumption_v9(struct kfd_node *dev, > case SOC15_IH_CLIENTID_SE3SH: > case SOC15_IH_CLIENTID_UTCL2: > block = AMDGPU_RAS_BLOCK__GFX; > - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 3) || > - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 4)) > - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; > - else > + if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 3)) { > + /* driver mode-2 for gfx poison is only supported by > + * pmfw 0x00557300 and onwards */ > + if (dev->adev->pm.fw_version < 0x00557300) > + reset = > AMDGPU_RAS_GPU_RESET_MODE1_RESET; > + else > + reset = > AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 4)) { > + /* driver mode-2 for gfx poison is only supported by > + * pmfw 0x05550C00 and onwards */ > + if (dev->adev->pm.fw_version < 0x05550C00) > + reset = > AMDGPU_RAS_GPU_RESET_MODE1_RESET; > + else > + reset = > AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } else { > reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } > break; > case SOC15_IH_CLIENTID_VMC: > case SOC15_IH_CLIENTID_VMC1: > @@ -184,11 +196,23 @@ static void > event_interrupt_poison_consumption_v9(struct kfd_node *dev, > case SOC15_IH_CLIENTID_SDMA3: > case SOC15_IH_CLIENTID_SDMA4: > block = AMDGPU_RAS_BLOCK__SDMA; > - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 3) || > - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == > IP_VERSION(9, 4, 4)) > - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; > - else > + if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == > IP_VERSION(4, 4, 2)) { > + /* driver mode-2 for gfx poison is only supported by > + * pmfw 0x00557300 and onwards */ > + if (dev->adev->pm.fw_version < 0x00557300) > + reset = > AMDGPU_RAS_GPU_RESET_MODE1_RESET; > + else > + reset = > AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == > IP_VERSION(4, 4, 5)) { > + /* driver mode-2 for gfx poison is only supported by > + * pmfw 0x05550C00 and onwards */ > + if (dev->adev->pm.fw_version < 0x05550C00) > + reset = > AMDGPU_RAS_GPU_RESET_MODE1_RESET; > + else > + reset = > AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } else { > reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; > + } > break; > default: > dev_warn(dev->adev->dev, > -- > 2.17.1