Currently if the GMU resume function fails all we try to do is clear the BOOT_SLUMBER oob which usually times out and ends up in a cycle of death. If the resume function fails at any point remove any RPMh votes that might have been added and try to shut down the GMU hardware cleanly. Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 82 +++++++++++++++++++----------- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 20 ++------ drivers/gpu/drm/msm/adreno/adreno_device.c | 1 + 3 files changed, 58 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e16d55d..2e89ca3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -638,20 +638,6 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR | \ A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR) -static void a6xx_gmu_irq_enable(struct a6xx_gmu *gmu) -{ - gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); - gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0); - - gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, - ~A6XX_GMU_IRQ_MASK); - gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, - ~A6XX_HFI_IRQ_MASK); - - enable_irq(gmu->gmu_irq); - enable_irq(gmu->hfi_irq); -} - static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu) { disable_irq(gmu->gmu_irq); @@ -661,11 +647,24 @@ static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu) gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0); } -/* Force the GMU off in case it isn't responsive */ -static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) +static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) { u32 val; + /* Make sure there are no outstanding RPMh votes */ + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val, + (val & 1), 100, 1000); +} + +/* Force the GMU off in case it isn't responsive */ +static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) +{ /* Flush all the queues */ a6xx_hfi_stop(gmu); @@ -676,14 +675,7 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) a6xx_sptprac_disable(gmu); /* Make sure there are no outstanding RPMh votes */ - gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val, - (val & 1), 100, 10000); - gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val, - (val & 1), 100, 10000); - gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val, - (val & 1), 100, 10000); - gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val, - (val & 1), 100, 1000); + a6xx_gmu_rpmh_off(gmu); } int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) @@ -702,10 +694,15 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Use a known rate to bring up the GMU */ clk_set_rate(gmu->core_clk, 200000000); ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); - if (ret) - goto out; + if (ret) { + pm_runtime_put(gmu->dev); + return ret; + } - a6xx_gmu_irq_enable(gmu); + /* Enable the GMU interrupt */ + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~A6XX_GMU_IRQ_MASK); + enable_irq(gmu->gmu_irq); /* Check to see if we are doing a cold or warm boot */ status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ? @@ -716,6 +713,16 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) goto out; ret = a6xx_hfi_start(gmu, status); + if (ret) + goto out; + + /* + * Turn on the GMU firmware fault interrupt after we know the boot + * sequence is successful + */ + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK); + enable_irq(gmu->hfi_irq); /* Set the GPU to the highest power frequency */ __a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); @@ -729,9 +736,12 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) pm_runtime_get(gmu->gxpd); out: - /* Make sure to turn off the boot OOB request on error */ - if (ret) - a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + /* On failure, shut down the GMU to leave it in a good state */ + if (ret) { + disable_irq(gmu->gmu_irq); + a6xx_rpmh_stop(gmu); + pm_runtime_put(gmu->dev); + } return ret; } @@ -754,6 +764,9 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu) /* Gracefully try to shut down the GMU and by extension the GPU */ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) { + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; u32 val; /* @@ -771,6 +784,12 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) return; } + /* Clear the VBIF pipe before shutting down */ + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); + spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) + == 0xf); + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + /* tell the GMU we want to slumber */ a6xx_gmu_notify_slumber(gmu); @@ -808,6 +827,9 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) { struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + if (!pm_runtime_active(gmu->dev)) + return 0; + /* * Force the GMU off if we detected a hang, otherwise try to shut it * down gracefully diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index f76d8cd..576559a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -678,13 +678,15 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); int ret; - ret = a6xx_gmu_resume(a6xx_gpu); - gpu->needs_hw_init = true; + ret = a6xx_gmu_resume(a6xx_gpu); + if (ret) + return ret; + msm_gpu_resume_devfreq(gpu); - return ret; + return 0; } static int a6xx_pm_suspend(struct msm_gpu *gpu) @@ -694,18 +696,6 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) devfreq_suspend_device(gpu->devfreq.devfreq); - /* - * Make sure the GMU is idle before continuing (because some transitions - * may use VBIF - */ - a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu); - - /* Clear the VBIF pipe before shutting down */ - /* FIXME: This accesses the GPU - do we need to make sure it is on? */ - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); - spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf); - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); - return a6xx_gmu_stop(a6xx_gpu); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 714ed65..0d87db7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -229,6 +229,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { + pm_runtime_put_sync(&pdev->dev); DRM_DEV_ERROR(dev->dev, "Couldn't power up the GPU: %d\n", ret); return NULL; } -- 2.7.4