[Public] The various gfx11/gfx12 systems share the same start PC value, but it seems better to use the specific register CP_ME_PRGRM_CNTR_START to get the start PC value. Regards, Prike > -----Original Message----- > From: Alex Deucher <alexdeucher@xxxxxxxxx> > Sent: Thursday, February 20, 2025 3:56 AM > To: Liang, Prike <Prike.Liang@xxxxxxx> > Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Deucher, Alexander > <Alexander.Deucher@xxxxxxx>; Koenig, Christian > <Christian.Koenig@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Subject: Re: [PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe > reset > > On Sun, Jan 26, 2025 at 3:38 AM Prike Liang <Prike.Liang@xxxxxxx> wrote: > > > > Implement the kernel graphics queue pipe reset,and the driver will > > fallback to pipe reset when the queue reset fails. However, the ME FW > > hasn't fully supported pipe reset yet so disable the KGQ pipe reset > > temporarily. > > > > Signed-off-by: Prike Liang <Prike.Liang@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 76 > > +++++++++++++++++++++++++- > > 1 file changed, 74 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > index 89d17750af04..395872bb1401 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > @@ -65,6 +65,8 @@ > > #define regPC_CONFIG_CNTL_1 0x194d > > #define regPC_CONFIG_CNTL_1_BASE_IDX 1 > > > > +static uint32_t me_fw_start_pc; > > + > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); > > @@ -2932,6 +2934,9 @@ static void gfx_v11_0_config_gfx_rs64(struct > amdgpu_device *adev) > > tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, > MEC_PIPE2_RESET, 0); > > tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, > MEC_PIPE3_RESET, 0); > > WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); > > + > > + /* cache the firmware start PC */ > > + me_fw_start_pc = RREG32(SOC15_REG_OFFSET(GC, 0, > > + regCP_GFX_RS64_INSTR_PNTR1)); > > You can't use a global variable. It won't work if you have multiple GPUs in the > system. > > Alex > > > } > > > > static int gfx_v11_0_wait_for_rlc_autoload_complete(struct > > amdgpu_device *adev) @@ -6654,6 +6659,68 @@ static void > gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) > > amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } > > > > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) { > > + /* Disable the pipe reset until the CPFW fully support it.*/ > > + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); > > + return false; > > +} > > + > > + > > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) { > > + struct amdgpu_device *adev = ring->adev; > > + uint32_t reset_pipe = 0, clean_pipe = 0; > > + int r; > > + > > + if (!gfx_v11_pipe_reset_support(adev)) > > + return -EOPNOTSUPP; > > + > > + gfx_v11_0_set_safe_mode(adev, 0); > > + mutex_lock(&adev->srbm_mutex); > > + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > > + > > + switch (ring->pipe) { > > + case 0: > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + PFP_PIPE0_RESET, 1); > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + ME_PIPE0_RESET, 1); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + PFP_PIPE0_RESET, 0); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + ME_PIPE0_RESET, 0); > > + break; > > + case 1: > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + PFP_PIPE1_RESET, 1); > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, > > + ME_PIPE1_RESET, 1); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + PFP_PIPE1_RESET, 0); > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, > > + ME_PIPE1_RESET, 0); > > + break; > > + default: > > + break; > > + } > > + > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); > > + > > + r = RREG32(SOC15_REG_OFFSET(GC, 0, > regCP_GFX_RS64_INSTR_PNTR1)) - me_fw_start_pc; > > + soc21_grbm_select(adev, 0, 0, 0, 0); > > + mutex_unlock(&adev->srbm_mutex); > > + gfx_v11_0_unset_safe_mode(adev, 0); > > + > > + dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start > PC: %s\n", ring->name, > > + r == 0 ? "successfuly" : "failed"); > > + /* FIXME: Sometimes driver can't cache the ME firmware start PC > correctly, so the pipe reset status > > + * relies on the later gfx ring test result. > > + */ > > + return 0; > > +} > > + > > static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int > > vmid) { > > struct amdgpu_device *adev = ring->adev; @@ -6663,8 +6730,13 > > @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > > return -EINVAL; > > > > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); > > - if (r) > > - return r; > > + if (r) { > > + > > + dev_warn(adev->dev,"reset via MES failed and try pipe reset %d\n", > r); > > + r = gfx_v11_reset_gfx_pipe(ring); > > + if (r) > > + return r; > > + } > > > > r = amdgpu_bo_reserve(ring->mqd_obj, false); > > if (unlikely(r != 0)) { > > -- > > 2.34.1 > >