Workaround to make RAS recovery work in BACO reset. Change-Id: I4e4a81f719dcc88dfd49f583c4be3a373b5eab2c Signed-off-by: Le Ma <le.ma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 ++ drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 9 +++++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 1f26a17..919bd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -67,6 +67,8 @@ struct amdgpu_nbio_funcs { bool enable); void (*ih_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index); + void (*enable_doorbell_interrupt)(struct amdgpu_device *adev, + bool enable); void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 238c248..0db458f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -502,6 +502,13 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, } } +static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); +} + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, @@ -516,6 +523,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, + .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt, .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, .get_clockgating_state = nbio_v7_4_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fc6cfbc..5cf5f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -493,10 +493,15 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) { void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) return -ENOENT; + /* avoid NBIF got stuck when do RAS recovery in BACO reset */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + /* enter BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 1)) return -EIO; @@ -505,6 +510,10 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) if (pp_funcs->set_asic_baco_state(pp_handle, 0)) return -EIO; + /* re-enable doorbell interrupt after BACO exit */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + dev_info(adev->dev, "GPU BACO reset\n"); adev->in_baco_reset = 1; -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx