Re: [PATCH 2/2] drm/amdgpu: Add SDMA queue start/stop functions and integrate with ring funcs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 11.03.25 um 09:33 schrieb Jesse.zhang@xxxxxxx:
> From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx>
>
> This patch introduces two new functions, `amdgpu_sdma_stop_queue` and
> `amdgpu_sdma_start_queue`, to handle the stopping and starting of SDMA queues
> during engine reset operations. The changes include:
>
> 1. **New Functions**:
>    - `amdgpu_sdma_stop_queue`: Stops the SDMA queues and the scheduler's work queue
>      for the GFX and page rings.
>    - `amdgpu_sdma_start_queue`: Starts the SDMA queues and restarts the scheduler's
>      work queue for the GFX and page rings.
>
> 2. **Integration with Ring Functions**:
>    - The `stop_queue` and `start_queue` callbacks are added to the `amdgpu_ring_funcs`
>      structure and implemented for SDMA v4.4.2.
>
> Suggested-by:Jonathan Kim <jonathan.kim@xxxxxxx>
> Signed-off-by: Jesse Zhang <Jesse.Zhang@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 92 ++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  2 +
>  4 files changed, 97 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index d55c8b7fdb59..ff9aacbdf046 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -351,6 +351,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>  		0xffffffffffffffff : ring->buf_mask;
>  	/*  Initialize cached_rptr to 0 */
>  	ring->cached_rptr = 0;
> +	atomic_set(&ring->stop_refcount, 0);
>  
>  	/* Allocate ring buffer */
>  	if (ring->is_mes_queue) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 1c52ff92ea26..7a984dbb48c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -312,6 +312,8 @@ struct amdgpu_ring {
>  	unsigned int    entry_index;
>  	/* store the cached rptr to restore after reset */
>  	uint64_t cached_rptr;
> +	/* Reference counter for stop requests */
> +	atomic_t stop_refcount;
>  
>  };
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 39669f8788a7..7cd6dcd6e7f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -30,6 +30,7 @@
>  #define AMDGPU_CSA_SDMA_SIZE 64
>  /* SDMA CSA reside in the 3rd page of CSA */
>  #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
> +DEFINE_MUTEX(sdma_queue_mutex);

Absolutely clear NAK to using a global mutex for this.

Regards,
Christian.

>  
>  /*
>   * GPU SDMA IP block helpers function.
> @@ -504,6 +505,97 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
>  	}
>  }
>  
> +int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
> +{
> +	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
> +	struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
> +	struct amdgpu_ring *page_ring = &sdma_instance->page;
> +	int r;
> +
> +	mutex_lock(&sdma_queue_mutex);
> +
> +	/* Avoid accidentally unparking the sched thread during GPU reset */
> +	r = down_read_killable(&adev->reset_domain->sem);
> +	if (r)
> +		goto exit;
> +
> +	/* Increment the reference counter */
> +	atomic_inc(&gfx_ring->stop_refcount);
> +	if (adev->sdma.has_page_queue)
> +		atomic_inc(&page_ring->stop_refcount);
> +
> +	if (atomic_read(&gfx_ring->stop_refcount) != 1 ||
> +	   (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 1)) {
> +		up_read(&adev->reset_domain->sem);
> +		r = -EBUSY;
> +		goto exit;
> +	}
> +
> +	if (!amdgpu_ring_sched_ready(gfx_ring))
> +		drm_sched_wqueue_stop(&gfx_ring->sched);
> +
> +	if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring))
> +		drm_sched_wqueue_stop(&page_ring->sched);
> +
> +	if (gfx_ring->funcs && gfx_ring->funcs->stop_queue)
> +		gfx_ring->funcs->stop_queue(adev, instance_id);
> +
> +	if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->stop_queue)
> +		page_ring->funcs->stop_queue(adev, instance_id);
> +
> +	up_read(&adev->reset_domain->sem);
> +
> +exit:
> +	mutex_unlock(&sdma_queue_mutex);
> +	return r;
> +}
> +
> +int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id)
> +{
> +	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
> +	struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
> +	struct amdgpu_ring *page_ring = &sdma_instance->page;
> +	int r;
> +
> +	mutex_lock(&sdma_queue_mutex);
> +
> +	/* Avoid accidentally unparking the sched thread during GPU reset */
> +	r = down_read_killable(&adev->reset_domain->sem);
> +	if (r)
> +		goto exit;
> +
> +	/* Decrement the reference counter */
> +	atomic_dec(&gfx_ring->stop_refcount);
> +	if (adev->sdma.has_page_queue)
> +		atomic_dec(&page_ring->stop_refcount);
> +
> +	if (atomic_read(&gfx_ring->stop_refcount) != 0 ||
> +	   (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) != 0)) {
> +		up_read(&adev->reset_domain->sem);
> +		r = -EBUSY;
> +		goto exit;
> +	}
> +
> +	if (gfx_ring->funcs && gfx_ring->funcs->start_queue)
> +		gfx_ring->funcs->start_queue(adev, instance_id);
> +
> +	if (adev->sdma.has_page_queue && page_ring->funcs && page_ring->funcs->start_queue)
> +		page_ring->funcs->start_queue(adev, instance_id);
> +
> +	/* Restart the scheduler's work queue for the GFX and page rings */
> +	if (amdgpu_ring_sched_ready(gfx_ring))
> +		drm_sched_wqueue_start(&gfx_ring->sched);
> +
> +	if (amdgpu_ring_sched_ready(page_ring))
> +		drm_sched_wqueue_start(&page_ring->sched);
> +
> +	up_read(&adev->reset_domain->sem);
> +
> +exit:
> +	mutex_unlock(&sdma_queue_mutex);
> +	return r;
> +}
> +
>  /**
>   * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
>   * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 965169320065..a91791fa3ecf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -170,6 +170,8 @@ struct amdgpu_buffer_funcs {
>  
>  void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
>  int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues);
> +int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id);
> +int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id);
>  
>  #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
>  #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux