RE: [PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

The various gfx11/gfx12 systems share the same start PC value, but it seems better to use the specific register CP_ME_PRGRM_CNTR_START to get the start PC value.

Regards,
      Prike

> -----Original Message-----
> From: Alex Deucher <alexdeucher@xxxxxxxxx>
> Sent: Thursday, February 20, 2025 3:56 AM
> To: Liang, Prike <Prike.Liang@xxxxxxx>
> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Deucher, Alexander
> <Alexander.Deucher@xxxxxxx>; Koenig, Christian
> <Christian.Koenig@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>
> Subject: Re: [PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe
> reset
>
> On Sun, Jan 26, 2025 at 3:38 AM Prike Liang <Prike.Liang@xxxxxxx> wrote:
> >
> > Implement the kernel graphics queue pipe reset,and the driver will
> > fallback to pipe reset when the queue reset fails. However, the ME FW
> > hasn't fully supported pipe reset yet so disable the KGQ pipe reset
> > temporarily.
> >
> > Signed-off-by: Prike Liang <Prike.Liang@xxxxxxx>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 76
> > +++++++++++++++++++++++++-
> >  1 file changed, 74 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > index 89d17750af04..395872bb1401 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > @@ -65,6 +65,8 @@
> >  #define regPC_CONFIG_CNTL_1            0x194d
> >  #define regPC_CONFIG_CNTL_1_BASE_IDX   1
> >
> > +static uint32_t me_fw_start_pc;
> > +
> >  MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
> >  MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
> >  MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
> > @@ -2932,6 +2934,9 @@ static void gfx_v11_0_config_gfx_rs64(struct
> amdgpu_device *adev)
> >         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL,
> MEC_PIPE2_RESET, 0);
> >         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL,
> MEC_PIPE3_RESET, 0);
> >         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
> > +
> > +       /* cache the firmware start PC */
> > +       me_fw_start_pc = RREG32(SOC15_REG_OFFSET(GC, 0,
> > + regCP_GFX_RS64_INSTR_PNTR1));
>
> You can't use a global variable.  It won't work if you have multiple GPUs in the
> system.
>
> Alex
>
> >  }
> >
> >  static int gfx_v11_0_wait_for_rlc_autoload_complete(struct
> > amdgpu_device *adev) @@ -6654,6 +6659,68 @@ static void
> gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
> >         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */  }
> >
> > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) {
> > +       /* Disable the pipe reset until the CPFW fully support it.*/
> > +       dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
> > +       return false;
> > +}
> > +
> > +
> > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) {
> > +       struct amdgpu_device *adev = ring->adev;
> > +       uint32_t reset_pipe = 0, clean_pipe = 0;
> > +       int r;
> > +
> > +       if (!gfx_v11_pipe_reset_support(adev))
> > +               return -EOPNOTSUPP;
> > +
> > +       gfx_v11_0_set_safe_mode(adev, 0);
> > +       mutex_lock(&adev->srbm_mutex);
> > +       soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > +       switch (ring->pipe) {
> > +       case 0:
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE0_RESET, 1);
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE0_RESET, 1);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE0_RESET, 0);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE0_RESET, 0);
> > +               break;
> > +       case 1:
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE1_RESET, 1);
> > +               reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE1_RESET, 1);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          PFP_PIPE1_RESET, 0);
> > +               clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > +                                          ME_PIPE1_RESET, 0);
> > +               break;
> > +       default:
> > +               break;
> > +       }
> > +
> > +       WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
> > +       WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
> > +
> > +       r = RREG32(SOC15_REG_OFFSET(GC, 0,
> regCP_GFX_RS64_INSTR_PNTR1)) - me_fw_start_pc;
> > +       soc21_grbm_select(adev, 0, 0, 0, 0);
> > +       mutex_unlock(&adev->srbm_mutex);
> > +       gfx_v11_0_unset_safe_mode(adev, 0);
> > +
> > +       dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start
> PC: %s\n", ring->name,
> > +                       r == 0 ? "successfuly" : "failed");
> > +       /* FIXME: Sometimes driver can't cache the ME firmware start PC
> correctly, so the pipe reset status
> > +        * relies on the later gfx ring test result.
> > +        */
> > +       return 0;
> > +}
> > +
> >  static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int
> > vmid)  {
> >         struct amdgpu_device *adev = ring->adev; @@ -6663,8 +6730,13
> > @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> >                 return -EINVAL;
> >
> >         r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
> > -       if (r)
> > -               return r;
> > +       if (r) {
> > +
> > +               dev_warn(adev->dev,"reset via MES failed and try pipe reset %d\n",
> r);
> > +               r = gfx_v11_reset_gfx_pipe(ring);
> > +               if (r)
> > +                       return r;
> > +       }
> >
> >         r = amdgpu_bo_reserve(ring->mqd_obj, false);
> >         if (unlikely(r != 0)) {
> > --
> > 2.34.1
> >




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux