Re: [PATCH v2 25/25] drm/i915: vlv: add runtime PM support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 14, 2014 at 08:24:46PM +0300, Imre Deak wrote:
> Add runtime PM support for VLV, but leave it disabled. The next patch
> enables it.
> 
> The suspend/resume sequence used is based on [1] and [2]. In practice we
> depend on the GT RC6 mechanism to save the HW context depending on the
> render and media power wells. By the time we run the runtime suspend
> callback the display side is also off and the HW context for that is
> managed by the display power domain framework.
> 
> Besides the above there are Gunit registers that depend on a system-wide
> power well. This power well goes off once the device enters any of the
> S0i[R123] states. To handle this scenario, save/restore these Gunit
> registers. Note that this is not the complete register set dictated by
> [2], to remove some overhead, registers that are known not to be used are
> ignored. Also some registers are fully setup by initialization functions
> called during resume, these are not saved either. The list of registers
> can be further reduced, see the TODO note in the code.
> 
> [1] VLV_gfx_clocking_PM_reset_y12w21d3 / "Driver D3 entry/exit"
> [2] VLV2_S0IXRegs
> 
> Signed-off-by: Imre Deak <imre.deak@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_drv.c | 327 ++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h |  62 ++++++++
>  2 files changed, 389 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 08e210c..bc206dd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -911,6 +911,198 @@ static int hsw_runtime_resume(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +/*
> + * Save all Gunit registers that may be lost after a D3 and a subsequent
> + * S0i[R123] transition. The list of registers needing a save/restore is
> + * defined in the VLV2_S0IXRegs document. This documents marks all Gunit
> + * registers in the following way:
> + * - Driver: saved/restored by the driver
> + * - Punit : saved/restored by the Punit firmware
> + * - No, w/o marking: no need to save/restore, since the register is R/O or
> + *                    used internally by the HW in a way that doesn't depend
> + *                    keeping the content across a suspend/resume.
> + * - Debug : used for debugging
> + *
> + * We save/restore all registers marked with 'Driver', with the following
> + * exceptions:
> + * - Registers out of use, including also registers marked with 'Debug'.
> + *   These have no effect on the driver's operation, so we don't save/restore
> + *   them to reduce the overhead.
> + * - Registers that are fully setup by an initialization function called from
> + *   the resume path. For example many clock gating and RPS/RC6 registers.
> + * - Registers that provide the right functionality with their reset defaults.
> + *
> + * TODO: Except for registers that based on the above 3 criteria can be safely
> + * ignored, we save/restore all others, practically treating the HW context as
> + * a black-box for the driver. Further investigation is needed to reduce the
> + * saved/restored registers even further, by following the same 3 criteria.
> + */
> +static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
> +{
> +	struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state;
> +	int i;
> +
> +	/* GAM 0x4000-0x4770 */
> +	s->wr_watermark		= I915_READ(GEN7_WR_WATERMARK);
> +	s->gfx_prio_ctrl	= I915_READ(GEN7_GFX_PRIO_CTRL);
> +	s->arb_mode		= I915_READ(ARB_MODE);
> +	s->gfx_pend_tlb0	= I915_READ(GEN7_GFX_PEND_TLB0);
> +	s->gfx_pend_tlb1	= I915_READ(GEN7_GFX_PEND_TLB1);
> +
> +	for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++)
> +		s->lra_limits[i] = I915_READ(GEN7_LRA_LIMITS_BASE + i * 4);
> +
> +	s->media_max_req_count	= I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
> +	s->gfx_max_req_count	= I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
> +
> +	s->render_hwsp		= I915_READ(RENDER_HWS_PGA_GEN7);
> +	s->ecochk		= I915_READ(GAM_ECOCHK);
> +	s->bsd_hwsp		= I915_READ(BSD_HWS_PGA_GEN7);
> +	s->blt_hwsp		= I915_READ(BLT_HWS_PGA_GEN7);
> +
> +	s->tlb_rd_addr		= I915_READ(GEN7_TLB_RD_ADDR);
> +
> +	/* MBC 0x9024-0x91D0, 0x8500 */
> +	s->g3dctl		= I915_READ(GEN7_G3DCTL);
> +	s->gsckgctl		= I915_READ(GEN7_GSCKGCTL);
> +	s->mbctl		= I915_READ(GEN6_MBCTL);
> +
> +	/* GCP 0x9400-0x9424, 0x8100-0x810C */
> +	s->ucgctl1		= I915_READ(GEN6_UCGCTL1);
> +	s->ucgctl3		= I915_READ(GEN7_UCGCTL3);
> +	s->rcgctl1		= I915_READ(GEN7_RCGCTL1);
> +	s->rcgctl2		= I915_READ(GEN7_RCGCTL2);
> +	s->rstctl		= I915_READ(GEN7_RSTCTL);
> +	s->misccpctl		= I915_READ(GEN7_MISCCPCTL);
> +
> +	/* GPM 0xA000-0xAA84, 0x8000-0x80FC */
> +	s->gfxpause		= I915_READ(GEN7_GFXPAUSE);
> +	s->rpdeuhwtc		= I915_READ(GEN7_RPDEUHWTC);
> +	s->rpdeuc		= I915_READ(GEN7_RPDEUC);
> +	s->ecobus		= I915_READ(ECOBUS);
> +	s->pwrdwnupctl		= I915_READ(VLV_PWRDWNUPCTL);
> +	s->rp_down_timeout	= I915_READ(GEN6_RP_DOWN_TIMEOUT);
> +	s->rp_deucsw		= I915_READ(GEN7_RPDEUCSW);
> +	s->rcubmabdtmr		= I915_READ(VLV_RCUBMABDTMR);
> +	s->rcedata		= I915_READ(VLV_RCEDATA);
> +	s->spare2gh		= I915_READ(VLV_SPAREG2H);
> +
> +	/* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */
> +	s->gt_imr		= I915_READ(GTIMR);
> +	s->gt_ier		= I915_READ(GTIER);
> +	s->pm_imr		= I915_READ(GEN6_PMIIR);
                                                    ^
M

> +	s->pm_ier		= I915_READ(GEN6_PMIER);
> +
> +	for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++)
> +		s->gt_scratch[i] = I915_READ(GEN7_GT_SCRATCH_BASE + i * 4);
> +
> +	/* GT SA CZ domain, 0x100000-0x138124 */
> +	s->tilectl		= I915_READ(TILECTL);
> +	s->gt_fifoctl		= I915_READ(GTFIFOCTL);
> +	s->gtlc_wake_ctrl	= I915_READ(VLV_GTLC_WAKE_CTRL);
> +	s->gtlc_survive		= I915_READ(VLV_GTLC_SURVIVABILITY_REG);
> +	s->pmwgicz		= I915_READ(VLV_PMWGICZ);
> +
> +	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
> +	s->gu_ctl0		= I915_READ(VLV_GU_CTL0);
> +	s->gu_ctl1		= I915_READ(VLV_GU_CTL1);
> +	s->clock_gate_dis2	= I915_READ(VLV_GUNIT_CLOCK_GATE2);
> +
> +	/*
> +	 * Not saving any of:
> +	 * DFT,		0x9800-0x9EC0
> +	 * SARB,	0xB000-0xB1FC
> +	 * GAC,		0x5208-0x524C, 0x14000-0x14C000
> +	 * PCI CFG
> +	 */
> +}
> +
> +static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
> +{
> +	struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state;
> +	u32 val;
> +	int i;
> +
> +	/* GAM 0x4000-0x4770 */
> +	I915_WRITE(GEN7_WR_WATERMARK,	s->wr_watermark);
> +	I915_WRITE(GEN7_GFX_PRIO_CTRL,	s->gfx_prio_ctrl);
> +	I915_WRITE(ARB_MODE,		s->arb_mode | (0xffff << 16));
> +	I915_WRITE(GEN7_GFX_PEND_TLB0,	s->gfx_pend_tlb0);
> +	I915_WRITE(GEN7_GFX_PEND_TLB1,	s->gfx_pend_tlb1);
> +
> +	for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++)
> +		I915_WRITE(GEN7_LRA_LIMITS_BASE + i * 4, s->lra_limits[i]);
> +
> +	I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->media_max_req_count);
> +	I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->gfx_max_req_count);
> +
> +	I915_WRITE(RENDER_HWS_PGA_GEN7,	s->render_hwsp);
> +	I915_WRITE(GAM_ECOCHK,		s->ecochk);
> +	I915_WRITE(BSD_HWS_PGA_GEN7,	s->bsd_hwsp);
> +	I915_WRITE(BLT_HWS_PGA_GEN7,	s->blt_hwsp);
> +
> +	I915_WRITE(GEN7_TLB_RD_ADDR,	s->tlb_rd_addr);
> +
> +	/* MBC 0x9024-0x91D0, 0x8500 */
> +	I915_WRITE(GEN7_G3DCTL,		s->g3dctl);
> +	I915_WRITE(GEN7_GSCKGCTL,	s->gsckgctl);
> +	I915_WRITE(GEN6_MBCTL,		s->mbctl);
> +
> +	/* GCP 0x9400-0x9424, 0x8100-0x810C */
> +	I915_WRITE(GEN6_UCGCTL1,	s->ucgctl1);
> +	I915_WRITE(GEN7_UCGCTL3,	s->ucgctl3);
> +	I915_WRITE(GEN7_RCGCTL1,	s->rcgctl1);
> +	I915_WRITE(GEN7_RCGCTL2,	s->rcgctl2);
> +	I915_WRITE(GEN7_RSTCTL,		s->rstctl);
> +	I915_WRITE(GEN7_MISCCPCTL,	s->misccpctl);
> +
> +	/* GPM 0xA000-0xAA84, 0x8000-0x80FC */
> +	I915_WRITE(GEN7_GFXPAUSE,	s->gfxpause);
> +	I915_WRITE(GEN7_RPDEUHWTC,	s->rpdeuhwtc);
> +	I915_WRITE(GEN7_RPDEUC,		s->rpdeuc);
> +	I915_WRITE(ECOBUS,		s->ecobus);
> +	I915_WRITE(VLV_PWRDWNUPCTL,	s->pwrdwnupctl);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,s->rp_down_timeout);
> +	I915_WRITE(GEN7_RPDEUCSW,	s->rp_deucsw);
> +	I915_WRITE(VLV_RCUBMABDTMR,	s->rcubmabdtmr);
> +	I915_WRITE(VLV_RCEDATA,		s->rcedata);
> +	I915_WRITE(VLV_SPAREG2H,	s->spare2gh);
> +
> +	/* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */
> +	I915_WRITE(GTIMR,		s->gt_imr);
> +	I915_WRITE(GTIER,		s->gt_ier);
> +	I915_WRITE(GEN6_PMIIR,		s->pm_imr);
                           ^
M

> +	I915_WRITE(GEN6_PMIER,		s->pm_ier);
> +
> +	for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++)
> +		I915_WRITE(GEN7_GT_SCRATCH_BASE + i * 4, s->gt_scratch[i]);
> +
> +	/* GT SA CZ domain, 0x100000-0x138124 */
> +	I915_WRITE(TILECTL,			s->tilectl);
> +	I915_WRITE(GTFIFOCTL,			s->gt_fifoctl);
> +	/*
> +	 * Preserve the GT allow wake and GFX force clock bit, they are not
> +	 * be restored, as they are used to control the s0ix suspend/resume
> +	 * sequence by the caller.
> +	 */
> +	val = I915_READ(VLV_GTLC_WAKE_CTRL);
> +	val &= VLV_GTLC_ALLOWWAKEREQ;
> +	val |= s->gtlc_wake_ctrl & ~VLV_GTLC_ALLOWWAKEREQ;
> +	I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
> +
> +	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
> +	val &= VLV_GFX_CLK_FORCE_ON_BIT;
> +	val |= s->gtlc_survive & ~VLV_GFX_CLK_FORCE_ON_BIT;
> +	I915_WRITE(VLV_GTLC_SURVIVABILITY_REG, val);
> +
> +	I915_WRITE(VLV_PMWGICZ,			s->pmwgicz);
> +
> +	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
> +	I915_WRITE(VLV_GU_CTL0,			s->gu_ctl0);
> +	I915_WRITE(VLV_GU_CTL1,			s->gu_ctl1);
> +	I915_WRITE(VLV_GUNIT_CLOCK_GATE2,	s->clock_gate_dis2);
> +}
> +
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
>  {
>  	u32 val;
> @@ -948,6 +1140,137 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
>  #undef COND
>  }
>  
> +static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
> +{
> +	u32 val;
> +	int err = 0;
> +
> +	val = I915_READ(VLV_GTLC_WAKE_CTRL);
> +	val &= ~VLV_GTLC_ALLOWWAKEREQ;
> +	if (allow)
> +		val |= VLV_GTLC_ALLOWWAKEREQ;
> +	I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
> +	POSTING_READ(VLV_GTLC_WAKE_CTRL);
> +
> +#define COND (!!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEACK) == \
> +	      allow)
> +	err = wait_for(COND, 1);
> +	if (err)
> +		DRM_ERROR("timeout disabling GT waking\n");
> +	return err;
> +#undef COND
> +}
> +
> +static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
> +				 bool wait_for_on)
> +{
> +	u32 mask;
> +	u32 val;
> +	int err;
> +
> +	mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK;
> +	val = wait_for_on ? mask : 0;
> +#define COND ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val)
> +	if (COND)
> +		return 0;
> +
> +	DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n",
> +			wait_for_on ? "on" : "off",
> +			I915_READ(VLV_GTLC_PW_STATUS));
> +
> +	/*
> +	 * RC6 transitioning can be delayed up to 2 msec (see
> +	 * valleyview_enable_rps), use 3 msec for safety.
> +	 */
> +	err = wait_for(COND, 3);
> +	if (err)
> +		DRM_ERROR("timeout waiting for GT wells to go %s\n",
> +			  wait_for_on ? "on" : "off");
> +
> +	return err;
> +#undef COND
> +}
> +
> +static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
> +{
> +	if (!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEERR))
> +		return;
> +
> +	DRM_ERROR("GT register access while GT waking disabled\n");
> +	I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR);
> +}
> +
> +static int vlv_runtime_suspend(struct drm_i915_private *dev_priv)
> +{
> +	u32 mask;
> +	int err;
> +
> +	/*
> +	 * Bspec defines the following GT well on flags as debug only, so
> +	 * don't treat them as hard failures.
> +	 */
> +	(void)vlv_wait_for_gt_wells(dev_priv, false);
> +
> +	mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS;
> +	WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask);
> +
> +	vlv_check_no_gt_access(dev_priv);
> +
> +	err = vlv_force_gfx_clock(dev_priv, true);
> +	if (err)
> +		goto err1;
> +
> +	err = vlv_allow_gt_wake(dev_priv, false);
> +	if (err)
> +		goto err2;
> +	vlv_save_gunit_s0ix_state(dev_priv);
> +
> +	err = vlv_force_gfx_clock(dev_priv, false);
> +	if (err)
> +		goto err2;
> +
> +	return 0;
> +
> +err2:
> +	/* For safety always re-enable waking and disable gfx clock forcing */
> +	vlv_allow_gt_wake(dev_priv, true);
> +err1:
> +	vlv_force_gfx_clock(dev_priv, false);
> +
> +	return err;
> +}
> +
> +static int vlv_runtime_resume(struct drm_i915_private *dev_priv)
> +{
> +	struct drm_device *dev = dev_priv->dev;
> +	int err;
> +	int ret;
> +
> +	/*
> +	 * If any of the steps fail just try to continue, that's the best we
> +	 * can do at this point. Return the first error code (which will also
> +	 * leave RPM permanently disabled).
> +	 */
> +	ret = vlv_force_gfx_clock(dev_priv, true);
> +
> +	vlv_restore_gunit_s0ix_state(dev_priv);
> +
> +	err = vlv_allow_gt_wake(dev_priv, true);
> +	if (!ret)
> +		ret = err;
> +
> +	err = vlv_force_gfx_clock(dev_priv, false);
> +	if (!ret)
> +		ret = err;
> +
> +	vlv_check_no_gt_access(dev_priv);
> +
> +	intel_init_clock_gating(dev);
> +	i915_gem_restore_fences(dev);
> +
> +	return ret;
> +}
> +
>  static int intel_runtime_suspend(struct device *device)
>  {
>  	struct pci_dev *pdev = to_pci_dev(device);
> @@ -970,6 +1293,8 @@ static int intel_runtime_suspend(struct device *device)
>  		ret = 0;
>  	} if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
>  		ret = hsw_runtime_suspend(dev_priv);
> +	} else if (IS_VALLEYVIEW(dev)) {
> +		ret = vlv_runtime_suspend(dev_priv);
>  	} else {
>  		ret = -ENODEV;
>  		WARN_ON(1);
> @@ -1018,6 +1343,8 @@ static int intel_runtime_resume(struct device *device)
>  		ret = snb_runtime_resume(dev_priv);
>  	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
>  		ret = hsw_runtime_resume(dev_priv);
> +	} else if (IS_VALLEYVIEW(dev)) {
> +		ret = vlv_runtime_resume(dev_priv);
>  	} else {
>  		WARN_ON(1);
>  		ret = -ENODEV;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3cac434..77cb7fc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -819,6 +819,67 @@ struct i915_suspend_saved_registers {
>  	u32 savePCH_PORT_HOTPLUG;
>  };
>  
> +struct vlv_s0ix_state {
> +	/* GAM */
> +	u32 wr_watermark;
> +	u32 gfx_prio_ctrl;
> +	u32 arb_mode;
> +	u32 gfx_pend_tlb0;
> +	u32 gfx_pend_tlb1;
> +	u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM];
> +	u32 media_max_req_count;
> +	u32 gfx_max_req_count;
> +	u32 render_hwsp;
> +	u32 ecochk;
> +	u32 bsd_hwsp;
> +	u32 blt_hwsp;
> +	u32 tlb_rd_addr;
> +
> +	/* MBC */
> +	u32 g3dctl;
> +	u32 gsckgctl;
> +	u32 mbctl;
> +
> +	/* GCP */
> +	u32 ucgctl1;
> +	u32 ucgctl3;
> +	u32 rcgctl1;
> +	u32 rcgctl2;
> +	u32 rstctl;
> +	u32 misccpctl;
> +
> +	/* GPM */
> +	u32 gfxpause;
> +	u32 rpdeuhwtc;
> +	u32 rpdeuc;
> +	u32 ecobus;
> +	u32 pwrdwnupctl;
> +	u32 rp_down_timeout;
> +	u32 rp_deucsw;
> +	u32 rcubmabdtmr;
> +	u32 rcedata;
> +	u32 spare2gh;
> +
> +	/* Display 1 CZ domain */
> +	u32 gt_imr;
> +	u32 gt_ier;
> +	u32 pm_imr;
> +	u32 pm_ier;
> +	u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM];
> +
> +	/* GT SA CZ domain */
> +	u32 tilectl;
> +	u32 gt_fifoctl;
> +	u32 gtlc_wake_ctrl;
> +	u32 gtlc_survive;
> +	u32 pmwgicz;
> +
> +	/* Display 2 CZ domain */
> +	u32 gu_ctl0;
> +	u32 gu_ctl1;
> +	u32 clock_gate_dis2;
> +};
> +
>  struct intel_gen6_power_mgmt {
>  	/* work and pm_iir are protected by dev_priv->irq_lock */
>  	struct work_struct work;
> @@ -1447,6 +1508,7 @@ struct drm_i915_private {
>  
>  	u32 suspend_count;
>  	struct i915_suspend_saved_registers regfile;
> +	struct vlv_s0ix_state vlv_s0ix_state;
>  
>  	struct {
>  		/*
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux