RE: [PATCH 6.12 097/230] drm/amdgpu/gfx9: manually control gfxoff for CS on RV

"Deucher, Alexander" <Alexander.Deucher@xxxxxxx> · Wed, 19 Feb 2025 13:47:53 +0000

[Public]

> -----Original Message-----
> From: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Sent: Wednesday, February 19, 2025 3:27 AM
> To: stable@xxxxxxxxxxxxxxx
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>; patches@xxxxxxxxxxxxxxx;
> Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Błażej Szczygieł <mumei6102@xxxxxxxxx>;
> Sergey Kovalenko <seryoga.engineering@xxxxxxxxx>; Deucher, Alexander
> <Alexander.Deucher@xxxxxxx>
> Subject: [PATCH 6.12 097/230] drm/amdgpu/gfx9: manually control gfxoff for CS on
> RV
>
> 6.12-stable review patch.  If anyone has any objections, please let me know.

Please drop this one as well.  I just sent a proper backport for 6.13 and 6.12.

Alex

>
> ------------------
>
> From: Alex Deucher <alexander.deucher@xxxxxxx>
>
> commit b35eb9128ebeec534eed1cefd6b9b1b7282cf5ba upstream.
>
> When mesa started using compute queues more often we started seeing additional
> hangs with compute queues.
> Disabling gfxoff seems to mitigate that.  Manually control gfxoff and gfx pg with
> command submissions to avoid any issues related to gfxoff.  KFD already does the
> same thing for these chips.
>
> v2: limit to compute
> v3: limit to APUs
> v4: limit to Raven/PCO
> v5: only update the compute ring_funcs
> v6: Disable GFX PG
> v7: adjust order
>
> Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx>
> Suggested-by: Błażej Szczygieł <mumei6102@xxxxxxxxx>
> Suggested-by: Sergey Kovalenko <seryoga.engineering@xxxxxxxxx>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3861
> Link: https://lists.freedesktop.org/archives/amd-gfx/2025-January/119116.html
> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx # 6.12.x
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |   36
> ++++++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -7415,6 +7415,38 @@ static void gfx_v9_0_ring_emit_cleaner_s
>       amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */  }
>
> +static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) {
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ip_block *gfx_block =
> +             amdgpu_device_ip_get_ip_block(adev,
> AMD_IP_BLOCK_TYPE_GFX);
> +
> +     amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
> +
> +     /* Raven and PCO APUs seem to have stability issues
> +      * with compute and gfxoff and gfx pg.  Disable gfx pg during
> +      * submission and allow again afterwards.
> +      */
> +     if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) ==
> IP_VERSION(9, 1, 0))
> +             gfx_v9_0_set_powergating_state(gfx_block,
> AMD_PG_STATE_UNGATE); }
> +
> +static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) {
> +     struct amdgpu_device *adev = ring->adev;
> +     struct amdgpu_ip_block *gfx_block =
> +             amdgpu_device_ip_get_ip_block(adev,
> AMD_IP_BLOCK_TYPE_GFX);
> +
> +     /* Raven and PCO APUs seem to have stability issues
> +      * with compute and gfxoff and gfx pg.  Disable gfx pg during
> +      * submission and allow again afterwards.
> +      */
> +     if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) ==
> IP_VERSION(9, 1, 0))
> +             gfx_v9_0_set_powergating_state(gfx_block,
> AMD_PG_STATE_GATE);
> +
> +     amdgpu_gfx_enforce_isolation_ring_end_use(ring);
> +}
> +
>  static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
>       .name = "gfx_v9_0",
>       .early_init = gfx_v9_0_early_init,
> @@ -7591,8 +7623,8 @@ static const struct amdgpu_ring_funcs gf
>       .emit_wave_limit = gfx_v9_0_emit_wave_limit,
>       .reset = gfx_v9_0_reset_kcq,
>       .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
> -     .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
> -     .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
> +     .begin_use = gfx_v9_0_ring_begin_use_compute,
> +     .end_use = gfx_v9_0_ring_end_use_compute,
>  };
>
>  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>