Re: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 27, 2025 at 7:36 AM Liang, Prike <Prike.Liang@xxxxxxx> wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Please review the series patch to catch up the gfx latest base and to avoid the commit merged problem.

See my comment on patch 1:

#define RS64_FW_UC_START_ADDR_LO 0x3000

Will this potentially change in future firmware versions or is it
fixed?  If it will change, then let's just read it back from registers
and store it in adev->gfx.rs64_fw_use_start_addr_lo so
that it will be correct if the user has a mix of GPUs in the system.
Other than those comments, the series looks good to me.

Alex


>
> Regards,
>       Prike
>
> > -----Original Message-----
> > From: Liang, Prike <Prike.Liang@xxxxxxx>
> > Sent: Friday, February 21, 2025 9:01 PM
> > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Liang, Prike
> > <Prike.Liang@xxxxxxx>
> > Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> > reset
> >
> > Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute
> > queue until the CPFW fully supports it.
> >
> > Signed-off-by: Prike Liang <Prike.Liang@xxxxxxx>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90 +++++++++++++++++++++++++-
> >  1 file changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > index 79ae7d538844..103298938d22 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> > *ring, unsigned int vmid)
> >       return amdgpu_ring_test_ring(ring);
> >  }
> >
> > +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) {
> > +
> > +     struct amdgpu_device *adev = ring->adev;
> > +     uint32_t reset_pipe = 0, clean_pipe = 0;
> > +     int r;
> > +
> > +     if (!gfx_v12_pipe_reset_support(adev))
> > +             return -EOPNOTSUPP;
> > +
> > +     gfx_v12_0_set_safe_mode(adev, 0);
> > +     mutex_lock(&adev->srbm_mutex);
> > +     soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > +     reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> > +     clean_pipe = reset_pipe;
> > +
> > +     if (adev->gfx.rs64_enable) {
> > +
> > +             switch (ring->pipe) {
> > +             case 0:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE0_RESET, 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE0_RESET, 0);
> > +                     break;
> > +             case 1:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE1_RESET, 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE1_RESET, 0);
> > +                     break;
> > +             case 2:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE2_RESET, 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE2_RESET, 0);
> > +                     break;
> > +             case 3:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE3_RESET, 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +                                                MEC_PIPE3_RESET, 0);
> > +                     break;
> > +             default:
> > +                     break;
> > +             }
> > +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> > +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> > +             r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR)
> > << 2) - RS64_FW_UC_START_ADDR_LO;
> > +     } else {
> > +             switch (ring->pipe) {
> > +             case 0:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > +                                                        MEC_ME1_PIPE0_RESET,
> > 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > +                                                        MEC_ME1_PIPE0_RESET,
> > 0);
> > +                     break;
> > +             case 1:
> > +                     reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > +                                                        MEC_ME1_PIPE1_RESET,
> > 1);
> > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > +                                                        MEC_ME1_PIPE1_RESET,
> > 0);
> > +                     break;
> > +             default:
> > +             break;
> > +             }
> > +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> > +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> > +             /* Doesn't find the F32 MEC instruction pointer register, and
> > suppose
> > +              * the driver won't run into the F32 mode.
> > +              */
> > +     }
> > +
> > +     soc24_grbm_select(adev, 0, 0, 0, 0);
> > +     mutex_unlock(&adev->srbm_mutex);
> > +     gfx_v12_0_unset_safe_mode(adev, 0);
> > +
> > +     dev_info(adev->dev,"The ring %s pipe resets: %s\n", ring->name,
> > +                     r == 0 ? "successfully" : "failed");
> > +     /* Need the ring test to verify the pipe reset result.*/
> > +     return 0;
> > +
> > +}
> >  static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)  {
> >       struct amdgpu_device *adev = ring->adev; @@ -5362,8 +5446,10 @@
> > static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
> >
> >       r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
> >       if (r) {
> > -             dev_err(adev->dev, "reset via MMIO failed %d\n", r);
> > -             return r;
> > +             dev_warn(adev->dev,"fail(%d) to reset kcq  and try pipe reset\n", r);
> > +             r = gfx_v12_0_reset_compute_pipe(ring);
> > +             if (r)
> > +                     return r;
> >       }
> >
> >       r = amdgpu_bo_reserve(ring->mqd_obj, false);
> > --
> > 2.34.1
>




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux