On Fri, Feb 21, 2025 at 8:38 AM Prike Liang <Prike.Liang@xxxxxxx> wrote: > > Implement the kernel graphics queue pipe reset,and the driver > will fallback to pipe reset when the queue reset fails. However, > the ME FW hasn't fully supported pipe reset yet so disable the > KGQ pipe reset temporarily. > > Signed-off-by: Prike Liang <Prike.Liang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 71 ++++++++++++++++++++++- > 2 files changed, 71 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > index 4eedd92f000b..06fe21e15ed6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > @@ -25,6 +25,8 @@ > > #include "amdgpu_socbb.h" > > +#define RS64_FW_UC_START_ADDR_LO 0x3000 > + > struct common_firmware_header { > uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ > uint32_t header_size_bytes; /* size of just the header in bytes */ > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > index 2c7f0bb242ff..7e53c0b63f88 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > @@ -6653,6 +6653,68 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ > } > > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) > +{ > + /* Disable the pipe reset until the CPFW fully support it.*/ > + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); I'd drop these or make them debug only for now. Same for gfx12. With that fixed, the series is: Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > + return false; > +} > + > + > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) > +{ > + struct amdgpu_device *adev = ring->adev; > + uint32_t reset_pipe = 0, clean_pipe = 0; > + int r; > + > + if (!gfx_v11_pipe_reset_support(adev)) > + return -EOPNOTSUPP; > + > + gfx_v11_0_set_safe_mode(adev, 0); > + mutex_lock(&adev->srbm_mutex); > + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > + > + switch (ring->pipe) { > + case 0: > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > + PFP_PIPE0_RESET, 1); > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > + ME_PIPE0_RESET, 1); > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > + PFP_PIPE0_RESET, 0); > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > + ME_PIPE0_RESET, 0); > + break; > + case 1: > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > + PFP_PIPE1_RESET, 1); > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > + ME_PIPE1_RESET, 1); > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > + PFP_PIPE1_RESET, 0); > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > + ME_PIPE1_RESET, 0); > + break; > + default: > + break; > + } > + > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); > + > + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - RS64_FW_UC_START_ADDR_LO; > + soc21_grbm_select(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > + gfx_v11_0_unset_safe_mode(adev, 0); > + > + dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, > + r == 0 ? "successfuly" : "failed"); > + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, so the pipe reset status > + * relies on the later gfx ring test result. > + */ > + return 0; > +} > + > static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > { > struct amdgpu_device *adev = ring->adev; > @@ -6662,8 +6724,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > return -EINVAL; > > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); > - if (r) > - return r; > + if (r) { > + > + dev_warn(adev->dev,"reset via MES failed and try pipe reset %d\n", r); > + r = gfx_v11_reset_gfx_pipe(ring); > + if (r) > + return r; > + } > > r = amdgpu_bo_reserve(ring->mqd_obj, false); > if (unlikely(r != 0)) { > -- > 2.34.1 >