On 3/17/2025 7:17 PM, Alex Deucher wrote: > The gfx and page queues are per instance, so track them > per instance. > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 7 ++++--- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 18 +++++++++++------- > 2 files changed, 15 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > index 8e7e794ff136f..dc1a81c2f9af7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > @@ -65,6 +65,10 @@ struct amdgpu_sdma_instance { > uint64_t sdma_fw_gpu_addr; > uint32_t *sdma_fw_ptr; > struct mutex engine_reset_mutex; > + /* track guilty state of GFX and PAGE queues */ > + bool gfx_guilty; > + bool page_guilty; > + > }; > > enum amdgpu_sdma_ras_memory_id { > @@ -127,9 +131,6 @@ struct amdgpu_sdma { > uint32_t *ip_dump; > uint32_t supported_reset; > struct list_head reset_callback_list; > - /* track guilty state of GFX and PAGE queues */ > - bool gfx_guilty; > - bool page_guilty; > }; > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index 927db7a080f0a..c485b582a20f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -989,9 +989,10 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, > uint32_t temp; > > WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); > - sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty); > + sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.instance[i].gfx_guilty); > if (adev->sdma.has_page_queue) > - sdma_v4_4_2_page_resume(adev, i, restore, adev->sdma.page_guilty); > + sdma_v4_4_2_page_resume(adev, i, restore, > + adev->sdma.instance[i].page_guilty); I think passing the flag is no longer be required as the instance id is passed already. Thanks, Lijo > > /* set utc l1 enable flag always to 1 */ > temp = RREG32_SDMA(i, regSDMA_CNTL); > @@ -1446,6 +1447,10 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) > > for (i = 0; i < adev->sdma.num_instances; i++) { > mutex_init(&adev->sdma.instance[i].engine_reset_mutex); > + /* Initialize guilty flags for GFX and PAGE queues */ > + adev->sdma.instance[i].gfx_guilty = false; > + adev->sdma.instance[i].page_guilty = false; > + > ring = &adev->sdma.instance[i].ring; > ring->ring_obj = NULL; > ring->use_doorbell = true; > @@ -1507,9 +1512,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) > r = amdgpu_sdma_sysfs_reset_mask_init(adev); > if (r) > return r; > - /* Initialize guilty flags for GFX and PAGE queues */ > - adev->sdma.gfx_guilty = false; > - adev->sdma.page_guilty = false; > > return r; > } > @@ -1686,9 +1688,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ > return -EINVAL; > > /* Check if this queue is the guilty one */ > - adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, false); > + adev->sdma.instance[instance_id].gfx_guilty = > + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); > if (adev->sdma.has_page_queue) > - adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, true); > + adev->sdma.instance[instance_id].page_guilty = > + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); > > /* Cache the rptr before reset, after the reset, > * all of the registers will be reset to 0