Re: [PATCH v3 2/2] drm/amdgpu: Add ring reset callback for JPEG4_0_3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2/18/2025 8:36 AM, Sathishkumar S wrote:
> Add ring reset function callback for JPEG4_0_3 to
> recover from job timeouts without a full gpu reset.
> 
> V2:
>  - sched->ready flag shouldn't be modified by HW backend (Christian)
> 
> V3:
>  - Dont modifying sched/job-submission state from HW backend (Christian)
>  - Implement per-core reset sequence
> 
> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@xxxxxxx>
> Acked-by: Christian König <christian.koenig@xxxxxxx>
> Reviewed-by: Leo Liu <leo.liu@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 49 +++++++++++++++++++++---
>  1 file changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> index c67ba961de91..f10231c22c15 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> @@ -204,14 +204,10 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
>  	if (r)
>  		return r;
>  
> -	/* TODO: Add queue reset mask when FW fully supports it */
> -	adev->jpeg.supported_reset =
> -		amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
> +	adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
>  	r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
> -	if (r)
> -		return r;
>  
> -	return 0;
> +	return r;
>  }
>  
>  /**
> @@ -231,6 +227,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
>  		return r;
>  
>  	amdgpu_jpeg_sysfs_reset_mask_fini(adev);
> +
>  	r = amdgpu_jpeg_sw_fini(adev);
>  
>  	return r;
> @@ -1099,6 +1096,45 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
>  	return 0;
>  }
>  
> +static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	int jpeg_inst = GET_INST(JPEG, ring->me);
> +	int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
> +
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regUVD_JMI0_UVD_JMI_CLIENT_STALL,
> +			    reg_offset, 0x1F);
> +	SOC15_WAIT_ON_RREG(JPEG, jpeg_inst,
> +			   regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
> +			   0x1F, 0x1f);
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regUVD_JMI0_JPEG_LMI_DROP,
> +			    reg_offset, 0x1F);
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regJPEG_CORE_RST_CTRL,
> +			    reg_offset, 1 << ring->pipe);
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regUVD_JMI0_UVD_JMI_CLIENT_STALL,
> +			    reg_offset, 0x00);
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regUVD_JMI0_JPEG_LMI_DROP,
> +			    reg_offset, 0x00);
> +	WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
> +			    regJPEG_CORE_RST_CTRL,
> +			    reg_offset, 0x00);
> +}
> +
> +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
> +{
> +	if (amdgpu_sriov_vf(ring->adev))
> +		return -EINVAL;

-EOPNOTSUPP could be more appropriate. Since it's not supported on VF,
the same check may be applied while initializing
adev->jpeg.supported_reset mask also so that sysfs options are not
created for VFs.

Thanks,
Lijo

> +
> +	jpeg_v4_0_3_core_stall_reset(ring);
> +	jpeg_v4_0_3_start_jrbc(ring);
> +	return amdgpu_ring_test_helper(ring);
> +}
> +
>  static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
>  	.name = "jpeg_v4_0_3",
>  	.early_init = jpeg_v4_0_3_early_init,
> @@ -1145,6 +1181,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
>  	.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
>  	.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +	.reset = jpeg_v4_0_3_ring_reset,
>  };
>  
>  static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux