Re: [PATCH 2/4] drm/amdgpu/sdma: Refactor SDMA reset functionality and add callback support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2/10/2025 1:01 PM, Jesse.zhang@xxxxxxx wrote:
> From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx>
> 
> This patch refactors the SDMA reset functionality in the `sdma_v4_4_2` driver
> to improve modularity and support shared usage between AMDGPU and KFD. The
> changes include:
> 
> 1. **Refactored SDMA Reset Logic**:
>    - Split the `sdma_v4_4_2_reset_queue` function into two separate functions:
>      - `sdma_v4_4_2_stop_queue`: Stops the SDMA queue before reset.
>      - `sdma_v4_4_2_restore_queue`: Restores the SDMA queue after reset.
>    - These functions are now used as callbacks for the shared reset mechanism.
> 
> 2. **Added Callback Support**:
>    - Introduced a new structure `sdma_v4_4_2_reset_funcs` to hold the stop and
>      restore callbacks.
>    - Added `sdma_v4_4_2_set_reset_funcs` to register these callbacks with the
>      shared reset mechanism using `amdgpu_set_on_reset_callbacks`.
> 
> 3. **Fixed Reset Queue Function**:
>    - Modified `sdma_v4_4_2_reset_queue` to use the shared `amdgpu_sdma_reset_queue`
>      function, ensuring consistency across the driver.
> 
> This patch ensures that SDMA reset functionality is more modular, reusable, and
> aligned with the shared reset mechanism between AMDGPU and KFD.
> 
> Suggested-by: Jiadong Zhu <Jiadong.Zhu@xxxxxxx>
> Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 32 +++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index 64c163dd708f..3e60456b0db0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -105,6 +105,7 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
>  static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
>  static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
>  static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
> +static void sdma_v4_4_2_set_reset_funcs(struct amdgpu_device *adev);
>  
>  static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
>  		u32 instance, u32 offset)
> @@ -1330,6 +1331,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
>  	sdma_v4_4_2_set_vm_pte_funcs(adev);
>  	sdma_v4_4_2_set_irq_funcs(adev);
>  	sdma_v4_4_2_set_ras_funcs(adev);
> +	sdma_v4_4_2_set_reset_funcs(adev);
>  
>  	return 0;
>  }
> @@ -1605,8 +1607,14 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
>  static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> -	int i, r;
> +	u32 id = GET_INST(SDMA0, ring->me);
> +	return amdgpu_sdma_reset_instance(adev, id);

Looks like it may be better to pass the amdgpu_ring pointer as the
argument here as well as for pre/post reset calls.

> +}
> +
> +static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
> +{
>  	u32 inst_mask;
> +	struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
>  
>  	if (amdgpu_sriov_vf(adev))
>  		return -EINVAL;
> @@ -1617,10 +1625,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
>  	if (adev->sdma.has_page_queue)
>  		sdma_v4_4_2_inst_page_stop(adev, inst_mask);
>  
> -	r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me));
> -	if (r)
> -		return r;
> +	return 0;
> +}
>  
> +static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id)
> +{
> +	int i;
> +	u32 inst_mask;
> +	struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
> +
> +	inst_mask = 1 << ring->me;
>  	udelay(50);
>  
>  	for (i = 0; i < adev->usec_timeout; i++) {
> @@ -1638,6 +1652,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
>  	return sdma_v4_4_2_inst_start(adev, inst_mask, true);
>  }
>  
> +static struct sdma_on_reset_funcs sdma_v4_4_2_reset_funcs = {
> +	.pre_reset = sdma_v4_4_2_stop_queue,

It's better to call these as prereset/postreset. That makes it a bit
easier to know when the function is used.

Thanks,
Lijo

> +	.post_reset = sdma_v4_4_2_restore_queue,
> +};
> +
> +static void sdma_v4_4_2_set_reset_funcs(struct amdgpu_device *adev)
> +{
> +	amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_reset_funcs);
> +}
> +
>  static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
>  					struct amdgpu_irq_src *source,
>  					unsigned type,




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux