On 2/10/2025 1:01 PM, Jesse.zhang@xxxxxxx wrote: > From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx> > > This patch refactors the SDMA reset functionality in the `sdma_v4_4_2` driver > to improve modularity and support shared usage between AMDGPU and KFD. The > changes include: > > 1. **Refactored SDMA Reset Logic**: > - Split the `sdma_v4_4_2_reset_queue` function into two separate functions: > - `sdma_v4_4_2_stop_queue`: Stops the SDMA queue before reset. > - `sdma_v4_4_2_restore_queue`: Restores the SDMA queue after reset. > - These functions are now used as callbacks for the shared reset mechanism. > > 2. **Added Callback Support**: > - Introduced a new structure `sdma_v4_4_2_reset_funcs` to hold the stop and > restore callbacks. > - Added `sdma_v4_4_2_set_reset_funcs` to register these callbacks with the > shared reset mechanism using `amdgpu_set_on_reset_callbacks`. > > 3. **Fixed Reset Queue Function**: > - Modified `sdma_v4_4_2_reset_queue` to use the shared `amdgpu_sdma_reset_queue` > function, ensuring consistency across the driver. > > This patch ensures that SDMA reset functionality is more modular, reusable, and > aligned with the shared reset mechanism between AMDGPU and KFD. > > Suggested-by: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 32 +++++++++++++++++++++--- > 1 file changed, 28 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index 64c163dd708f..3e60456b0db0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -105,6 +105,7 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); > static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); > +static void sdma_v4_4_2_set_reset_funcs(struct amdgpu_device *adev); > > static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, > u32 instance, u32 offset) > @@ -1330,6 +1331,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) > sdma_v4_4_2_set_vm_pte_funcs(adev); > sdma_v4_4_2_set_irq_funcs(adev); > sdma_v4_4_2_set_ras_funcs(adev); > + sdma_v4_4_2_set_reset_funcs(adev); > > return 0; > } > @@ -1605,8 +1607,14 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) > static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) > { > struct amdgpu_device *adev = ring->adev; > - int i, r; > + u32 id = GET_INST(SDMA0, ring->me); > + return amdgpu_sdma_reset_instance(adev, id); Looks like it may be better to pass the amdgpu_ring pointer as the argument here as well as for pre/post reset calls. > +} > + > +static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) > +{ > u32 inst_mask; > + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > > if (amdgpu_sriov_vf(adev)) > return -EINVAL; > @@ -1617,10 +1625,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) > if (adev->sdma.has_page_queue) > sdma_v4_4_2_inst_page_stop(adev, inst_mask); > > - r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me)); > - if (r) > - return r; > + return 0; > +} > > +static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) > +{ > + int i; > + u32 inst_mask; > + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; > + > + inst_mask = 1 << ring->me; > udelay(50); > > for (i = 0; i < adev->usec_timeout; i++) { > @@ -1638,6 +1652,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) > return sdma_v4_4_2_inst_start(adev, inst_mask, true); > } > > +static struct sdma_on_reset_funcs sdma_v4_4_2_reset_funcs = { > + .pre_reset = sdma_v4_4_2_stop_queue, It's better to call these as prereset/postreset. That makes it a bit easier to know when the function is used. Thanks, Lijo > + .post_reset = sdma_v4_4_2_restore_queue, > +}; > + > +static void sdma_v4_4_2_set_reset_funcs(struct amdgpu_device *adev) > +{ > + amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_reset_funcs); > +} > + > static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > unsigned type,