From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx> This patch introduces two new functions, `amdgpu_sdma_stop_queue` and `amdgpu_sdma_start_queue`, to handle the stopping and starting of SDMA queues during engine reset operations. The changes include: 1. **New Functions**: - `amdgpu_sdma_stop_queue`: Stops the SDMA queues and the scheduler's work queue for the GFX and page rings. - `amdgpu_sdma_start_queue`: Starts the SDMA queues and restarts the scheduler's work queue for the GFX and page rings. 2. **Integration with Ring Functions**: - The `stop_queue` and `start_queue` callbacks are added to the `amdgpu_ring_funcs` structure and implemented for SDMA v4.4.2. Suggested-by:Jonathan Kim <jonathan.kim@xxxxxxx> Signed-off-by: Jesse Zhang <Jesse.Zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 92 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 + 4 files changed, 97 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d55c8b7fdb59..ff9aacbdf046 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -351,6 +351,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 0xffffffffffffffff : ring->buf_mask; /* Initialize cached_rptr to 0 */ ring->cached_rptr = 0; + atomic_set(&ring->stop_refcount, 0); /* Allocate ring buffer */ if (ring->is_mes_queue) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1c52ff92ea26..7a984dbb48c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -312,6 +312,8 @@ struct amdgpu_ring { unsigned int entry_index; /* store the cached rptr to restore after reset */ uint64_t cached_rptr; + /* Reference counter for stop requests */ + atomic_t stop_refcount; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 39669f8788a7..7cd6dcd6e7f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -30,6 +30,7 @@ #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2) +DEFINE_MUTEX(sdma_queue_mutex); /* * GPU SDMA IP block helpers function. @@ -504,6 +505,97 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) } } +int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; + struct amdgpu_ring *gfx_ring = &sdma_instance->ring; + struct amdgpu_ring *page_ring = &sdma_instance->page; + int r; + + mutex_lock(&sdma_queue_mutex); + + /* Avoid accidentally unparking the sched thread during GPU reset */ + r = down_read_killable(&adev->reset_domain->sem); + if (r) + goto exit; + + /* Increment the reference counter */ + atomic_inc(&gfx_ring->stop_refcount); + if (adev->sdma.has_page_queue) + atomic_inc(&page_ring->stop_refcount); + + if (atomic_read(&gfx_ring->stop_refcount) != 1 || + (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 1)) { + up_read(&adev->reset_domain->sem); + r = -EBUSY; + goto exit; + } + + if (!amdgpu_ring_sched_ready(gfx_ring)) + drm_sched_wqueue_stop(&gfx_ring->sched); + + if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) + drm_sched_wqueue_stop(&page_ring->sched); + + if (gfx_ring->funcs && gfx_ring->funcs->stop_queue) + gfx_ring->funcs->stop_queue(adev, instance_id); + + if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->stop_queue) + page_ring->funcs->stop_queue(adev, instance_id); + + up_read(&adev->reset_domain->sem); + +exit: + mutex_unlock(&sdma_queue_mutex); + return r; +} + +int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; + struct amdgpu_ring *gfx_ring = &sdma_instance->ring; + struct amdgpu_ring *page_ring = &sdma_instance->page; + int r; + + mutex_lock(&sdma_queue_mutex); + + /* Avoid accidentally unparking the sched thread during GPU reset */ + r = down_read_killable(&adev->reset_domain->sem); + if (r) + goto exit; + + /* Decrement the reference counter */ + atomic_dec(&gfx_ring->stop_refcount); + if (adev->sdma.has_page_queue) + atomic_dec(&page_ring->stop_refcount); + + if (atomic_read(&gfx_ring->stop_refcount) != 0 || + (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 0)) { + up_read(&adev->reset_domain->sem); + r = -EBUSY; + goto exit; + } + + if (gfx_ring->funcs && gfx_ring->funcs->start_queue) + gfx_ring->funcs->start_queue(adev, instance_id); + + if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->start_queue) + page_ring->funcs->start_queue(adev, instance_id); + + /* Restart the scheduler's work queue for the GFX and page rings */ + if (amdgpu_ring_sched_ready(gfx_ring)) + drm_sched_wqueue_start(&gfx_ring->sched); + + if (amdgpu_ring_sched_ready(page_ring)) + drm_sched_wqueue_start(&page_ring->sched); + + up_read(&adev->reset_domain->sem); + +exit: + mutex_unlock(&sdma_queue_mutex); + return r; +} + /** * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 965169320065..a91791fa3ecf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -170,6 +170,8 @@ struct amdgpu_buffer_funcs { void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues); +int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id); +int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) -- 2.25.1