On 2/21/2025 4:15 PM, jesse.zhang@xxxxxxx wrote: > From: "Jesse.zhang@xxxxxxx" <jesse.zhang@xxxxxxx> > > - Added `sdma_v4_4_2_update_reset_mask` function to update the reset mask. > - update the sysfs reset mask to the `late_init` stage to ensure that the SMU initialization > and capability setup are completed before checking the SDMA reset capability. > - For IP versions 9.4.3 and 9.4.4, enable per-queue reset if the MEC firmware version is at least 0xb0 and PMFW supports queue reset. > - Add a TODO comment for future support of per-queue reset for IP version 9.4.5. > > This change ensures that per-queue reset is only enabled when the MEC and PMFW support it. > > Suggested-by: Jonathan Kim <Jonathan.Kim@xxxxxxx> > Signed-off-by: Vitaly Prosyak <vitaly.prosyak@xxxxxxx> > Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 37 +++++++++++++++++++++++- > 1 file changed, 36 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index 4fa688e00f5e..17e7e36f4477 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -107,6 +107,7 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); > +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); > > static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, > u32 instance, u32 offset) > @@ -1374,6 +1375,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block) > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); > > + /* The initialization is done in the late_init stage to ensure that the SMU > + * initialization and capability setup are completed before we check the SDMA > + * reset capability > + */ > + sdma_v4_4_2_update_reset_mask(adev); > + > return 0; > } > > @@ -1481,7 +1488,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) > } > } > > - /* TODO: Add queue reset mask when FW fully supports it */ > adev->sdma.supported_reset = > amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); > > @@ -2328,6 +2334,35 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) > adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; > } > > +/** > + * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA > + * @adev: Pointer to the AMDGPU device structure > + * > + * This function update reset mask for SDMA and sets the supported > + * reset types based on the IP version and firmware versions. > + * > + */ > +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) > +{ > + > + /* > + * the user queue relies on MEC fw and pmfw when the sdma queue do reset. > + * it needs to check both of them at here to skip old mec and pmfw. > + */ > + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { > + case IP_VERSION(9, 4, 3): > + case IP_VERSION(9, 4, 4): > + if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev)) > + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; > + break; > + case IP_VERSION(9, 4, 5): Did you mean v9.5.0? Thanks, Lijo > + /*TODO: enable the queue reset flag until fw supported */ > + default: > + break; > + } > + > +} > + > const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { > .type = AMD_IP_BLOCK_TYPE_SDMA, > .major = 4,