Re: [PATCH v2] drm/amdgpu: Fix the warning info in mode1 reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ping...

On 1/5/2024 2:05 PM, Ma Jun wrote:
> Fix the warning info below during mode1 reset.
> [  +0.000004] Call Trace:
> [  +0.000004]  <TASK>
> [  +0.000006]  ? show_regs+0x6e/0x80
> [  +0.000011]  ? __flush_work.isra.0+0x2e8/0x390
> [  +0.000005]  ? __warn+0x91/0x150
> [  +0.000009]  ? __flush_work.isra.0+0x2e8/0x390
> [  +0.000006]  ? report_bug+0x19d/0x1b0
> [  +0.000013]  ? handle_bug+0x46/0x80
> [  +0.000012]  ? exc_invalid_op+0x1d/0x80
> [  +0.000011]  ? asm_exc_invalid_op+0x1f/0x30
> [  +0.000014]  ? __flush_work.isra.0+0x2e8/0x390
> [  +0.000007]  ? __flush_work.isra.0+0x208/0x390
> [  +0.000007]  ? _prb_read_valid+0x216/0x290
> [  +0.000008]  __cancel_work_timer+0x11d/0x1a0
> [  +0.000007]  ? try_to_grab_pending+0xe8/0x190
> [  +0.000012]  cancel_work_sync+0x14/0x20
> [  +0.000008]  amddrm_sched_stop+0x3c/0x1d0 [amd_sched]
> [  +0.000032]  amdgpu_device_gpu_recover+0x29a/0xe90 [amdgpu]
> 
> This warning info was printed after applying the patch
> "drm/sched: Convert drm scheduler to use a work queue rather than kthread".
> The root cause is that amdgpu driver tries to use the uninitialized
> work_struct in the struct drm_gpu_scheduler
> 
> Signed-off-by: Ma Jun <Jun.Ma2@xxxxxxx>
> 
> v2:
>  - Rename the function to amdgpu_ring_sched_ready and move it to
> amdgpu_ring.c (Alex)
> 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 14 +++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +-
>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 4b1d5f42249f..d0d82e69b034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5700,7 +5700,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>  			struct amdgpu_ring *ring = tmp_adev->rings[i];
>  
> -			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
> +			if (!amdgpu_ring_sched_ready(ring))
>  				continue;
>  
>  			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
> @@ -5776,7 +5776,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>  			struct amdgpu_ring *ring = tmp_adev->rings[i];
>  
> -			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
> +			if (!amdgpu_ring_sched_ready(ring))
>  				continue;
>  
>  			drm_sched_start(&ring->sched, true);
> @@ -6265,7 +6265,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
>  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>  		struct amdgpu_ring *ring = adev->rings[i];
>  
> -		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
> +		if (!amdgpu_ring_sched_ready(ring))
>  			continue;
>  
>  		drm_sched_start(&ring->sched, true);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 41266bc99345..9555d5532d8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -636,7 +636,8 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
>  		DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
>  			      ring->name);
>  
> -	ring->sched.ready = !r;
> +	if (!ring->no_scheduler)
> +		ring->sched.ready = !r;
>  	return r;
>  }
>  
> @@ -719,3 +720,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
>  	if (ring->is_sw_ring)
>  		amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
>  }
> +
> +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
> +{
> +	if (!ring)
> +		return false;
> +
> +	if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
> +		return false;
> +
> +	return true;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index bbb53720a018..fe1a61eb6e4c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  int amdgpu_ib_pool_init(struct amdgpu_device *adev);
>  void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
>  int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
> -
> +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
>  #endif



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux