On Wed, 2014-04-16 at 16:53 +0200, Daniel Vetter wrote: > On Mon, Apr 14, 2014 at 08:24:46PM +0300, Imre Deak wrote: > > Add runtime PM support for VLV, but leave it disabled. The next patch > > enables it. > > > > The suspend/resume sequence used is based on [1] and [2]. In practice we > > depend on the GT RC6 mechanism to save the HW context depending on the > > render and media power wells. By the time we run the runtime suspend > > callback the display side is also off and the HW context for that is > > managed by the display power domain framework. > > > > Besides the above there are Gunit registers that depend on a system-wide > > power well. This power well goes off once the device enters any of the > > S0i[R123] states. To handle this scenario, save/restore these Gunit > > registers. Note that this is not the complete register set dictated by > > [2], to remove some overhead, registers that are known not to be used are > > ignored. Also some registers are fully setup by initialization functions > > called during resume, these are not saved either. The list of registers > > can be further reduced, see the TODO note in the code. > > > > [1] VLV_gfx_clocking_PM_reset_y12w21d3 / "Driver D3 entry/exit" > > [2] VLV2_S0IXRegs > > > > Signed-off-by: Imre Deak <imre.deak@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_drv.c | 327 ++++++++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/i915/i915_drv.h | 62 ++++++++ > > 2 files changed, 389 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > > index 08e210c..bc206dd 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -911,6 +911,198 @@ static int hsw_runtime_resume(struct drm_i915_private *dev_priv) > > return 0; > > } > > > > +/* > > + * Save all Gunit registers that may be lost after a D3 and a subsequent > > + * S0i[R123] transition. The list of registers needing a save/restore is > > + * defined in the VLV2_S0IXRegs document. This documents marks all Gunit > > + * registers in the following way: > > + * - Driver: saved/restored by the driver > > + * - Punit : saved/restored by the Punit firmware > > + * - No, w/o marking: no need to save/restore, since the register is R/O or > > + * used internally by the HW in a way that doesn't depend > > + * keeping the content across a suspend/resume. > > + * - Debug : used for debugging > > + * > > + * We save/restore all registers marked with 'Driver', with the following > > + * exceptions: > > + * - Registers out of use, including also registers marked with 'Debug'. > > + * These have no effect on the driver's operation, so we don't save/restore > > + * them to reduce the overhead. > > + * - Registers that are fully setup by an initialization function called from > > + * the resume path. For example many clock gating and RPS/RC6 registers. > > + * - Registers that provide the right functionality with their reset defaults. > > + * > > + * TODO: Except for registers that based on the above 3 criteria can be safely > > + * ignored, we save/restore all others, practically treating the HW context as > > + * a black-box for the driver. Further investigation is needed to reduce the > > + * saved/restored registers even further, by following the same 3 criteria. > > + */ > > +static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv) > > +{ > > + struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state; > > + int i; > > + > > + /* GAM 0x4000-0x4770 */ > > + s->wr_watermark = I915_READ(GEN7_WR_WATERMARK); > > + s->gfx_prio_ctrl = I915_READ(GEN7_GFX_PRIO_CTRL); > > + s->arb_mode = I915_READ(ARB_MODE); > > + s->gfx_pend_tlb0 = I915_READ(GEN7_GFX_PEND_TLB0); > > + s->gfx_pend_tlb1 = I915_READ(GEN7_GFX_PEND_TLB1); > > + > > + for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++) > > + s->lra_limits[i] = I915_READ(GEN7_LRA_LIMITS_BASE + i * 4); > > + > > + s->media_max_req_count = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT); > > + s->gfx_max_req_count = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT); > > + > > + s->render_hwsp = I915_READ(RENDER_HWS_PGA_GEN7); > > + s->ecochk = I915_READ(GAM_ECOCHK); > > + s->bsd_hwsp = I915_READ(BSD_HWS_PGA_GEN7); > > + s->blt_hwsp = I915_READ(BLT_HWS_PGA_GEN7); > > + > > + s->tlb_rd_addr = I915_READ(GEN7_TLB_RD_ADDR); > > + > > + /* MBC 0x9024-0x91D0, 0x8500 */ > > + s->g3dctl = I915_READ(GEN7_G3DCTL); > > + s->gsckgctl = I915_READ(GEN7_GSCKGCTL); > > + s->mbctl = I915_READ(GEN6_MBCTL); > > + > > + /* GCP 0x9400-0x9424, 0x8100-0x810C */ > > + s->ucgctl1 = I915_READ(GEN6_UCGCTL1); > > + s->ucgctl3 = I915_READ(GEN7_UCGCTL3); > > + s->rcgctl1 = I915_READ(GEN7_RCGCTL1); > > + s->rcgctl2 = I915_READ(GEN7_RCGCTL2); > > + s->rstctl = I915_READ(GEN7_RSTCTL); > > + s->misccpctl = I915_READ(GEN7_MISCCPCTL); > > + > > + /* GPM 0xA000-0xAA84, 0x8000-0x80FC */ > > + s->gfxpause = I915_READ(GEN7_GFXPAUSE); > > + s->rpdeuhwtc = I915_READ(GEN7_RPDEUHWTC); > > + s->rpdeuc = I915_READ(GEN7_RPDEUC); > > + s->ecobus = I915_READ(ECOBUS); > > + s->pwrdwnupctl = I915_READ(VLV_PWRDWNUPCTL); > > + s->rp_down_timeout = I915_READ(GEN6_RP_DOWN_TIMEOUT); > > + s->rp_deucsw = I915_READ(GEN7_RPDEUCSW); > > + s->rcubmabdtmr = I915_READ(VLV_RCUBMABDTMR); > > + s->rcedata = I915_READ(VLV_RCEDATA); > > + s->spare2gh = I915_READ(VLV_SPAREG2H); > > + > > + /* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */ > > + s->gt_imr = I915_READ(GTIMR); > > + s->gt_ier = I915_READ(GTIER); > > + s->pm_imr = I915_READ(GEN6_PMIIR); > > + s->pm_ier = I915_READ(GEN6_PMIER); > > + > > + for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++) > > + s->gt_scratch[i] = I915_READ(GEN7_GT_SCRATCH_BASE + i * 4); > > + > > + /* GT SA CZ domain, 0x100000-0x138124 */ > > + s->tilectl = I915_READ(TILECTL); > > + s->gt_fifoctl = I915_READ(GTFIFOCTL); > > + s->gtlc_wake_ctrl = I915_READ(VLV_GTLC_WAKE_CTRL); > > + s->gtlc_survive = I915_READ(VLV_GTLC_SURVIVABILITY_REG); > > + s->pmwgicz = I915_READ(VLV_PMWGICZ); > > + > > + /* Gunit-Display CZ domain, 0x182028-0x1821CF */ > > + s->gu_ctl0 = I915_READ(VLV_GU_CTL0); > > + s->gu_ctl1 = I915_READ(VLV_GU_CTL1); > > + s->clock_gate_dis2 = I915_READ(VLV_GUNIT_CLOCK_GATE2); > > + > > + /* > > + * Not saving any of: > > + * DFT, 0x9800-0x9EC0 > > + * SARB, 0xB000-0xB1FC > > + * GAC, 0x5208-0x524C, 0x14000-0x14C000 > > + * PCI CFG > > + */ > > +} > > Ok somehow I've missed this, and it totally freaks me out ;-) I really > don't like large-scale register save/restore code since it tends to be > race and brittle. And a bunch of these (e.g. for the interrupt stuff) > should be handled already by simply running the interrupt code again. I don't like it either and that's why I put a TODO in the comment above to further reduce the list. Note that I already removed a bunch of the registers that the spec says we should save/restore. Those I found to be safe, but for all others there is no guarantee that we can do away with this code. It needs time to go through all this stuff, so I chose this solution leaving a way to optimize things further over time. > Most of the others probably just work if we run the ring init code again. > Thanks to Chris' work to not tear down ring structures (unfortunately > patches stuck in review limbo) that boils down to a simple call to the > ring init functions. Yes, I noticed that and once it gets merged and someone cross-checks it against this reglist we can remove some further registers. Until that this code is at worst unneeded overhead, I don't see how it can cause any functional issues. > For the remaining ones we either ok with the reset value, or we definitely > miss a w/a somewhere or it's placed at the wrong place (e.g. in the > clock_gating function instead of the right ring init function). Yes, those need to be checked and fixed up one-by-one, but again until that happens this code is at worst unneeded overhead. But in case the BIOS sets some value to its non-reset value it can save things. > Not sure yet what to do whit this patch, but I'm voting for further > discussion. Imo all the others can already be pulled in ... I'd vote for inclusion with a promise from me that I reduce the reglist here over time. > Also I wonder whether hsw/bdw _really_ don't need any of this, or whether > it would be better to manually load the ring state into the hardware again > too. Another reason for more unified runtime pm code ;-) In this patchset I made the first step for a more unified PM code, could we continue from there on? :) --Imre > > Cheers, Daniel > > > + > > +static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) > > +{ > > + struct vlv_s0ix_state *s = &dev_priv->vlv_s0ix_state; > > + u32 val; > > + int i; > > + > > + /* GAM 0x4000-0x4770 */ > > + I915_WRITE(GEN7_WR_WATERMARK, s->wr_watermark); > > + I915_WRITE(GEN7_GFX_PRIO_CTRL, s->gfx_prio_ctrl); > > + I915_WRITE(ARB_MODE, s->arb_mode | (0xffff << 16)); > > + I915_WRITE(GEN7_GFX_PEND_TLB0, s->gfx_pend_tlb0); > > + I915_WRITE(GEN7_GFX_PEND_TLB1, s->gfx_pend_tlb1); > > + > > + for (i = 0; i < ARRAY_SIZE(s->lra_limits); i++) > > + I915_WRITE(GEN7_LRA_LIMITS_BASE + i * 4, s->lra_limits[i]); > > + > > + I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->media_max_req_count); > > + I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->gfx_max_req_count); > > + > > + I915_WRITE(RENDER_HWS_PGA_GEN7, s->render_hwsp); > > + I915_WRITE(GAM_ECOCHK, s->ecochk); > > + I915_WRITE(BSD_HWS_PGA_GEN7, s->bsd_hwsp); > > + I915_WRITE(BLT_HWS_PGA_GEN7, s->blt_hwsp); > > + > > + I915_WRITE(GEN7_TLB_RD_ADDR, s->tlb_rd_addr); > > + > > + /* MBC 0x9024-0x91D0, 0x8500 */ > > + I915_WRITE(GEN7_G3DCTL, s->g3dctl); > > + I915_WRITE(GEN7_GSCKGCTL, s->gsckgctl); > > + I915_WRITE(GEN6_MBCTL, s->mbctl); > > + > > + /* GCP 0x9400-0x9424, 0x8100-0x810C */ > > + I915_WRITE(GEN6_UCGCTL1, s->ucgctl1); > > + I915_WRITE(GEN7_UCGCTL3, s->ucgctl3); > > + I915_WRITE(GEN7_RCGCTL1, s->rcgctl1); > > + I915_WRITE(GEN7_RCGCTL2, s->rcgctl2); > > + I915_WRITE(GEN7_RSTCTL, s->rstctl); > > + I915_WRITE(GEN7_MISCCPCTL, s->misccpctl); > > + > > + /* GPM 0xA000-0xAA84, 0x8000-0x80FC */ > > + I915_WRITE(GEN7_GFXPAUSE, s->gfxpause); > > + I915_WRITE(GEN7_RPDEUHWTC, s->rpdeuhwtc); > > + I915_WRITE(GEN7_RPDEUC, s->rpdeuc); > > + I915_WRITE(ECOBUS, s->ecobus); > > + I915_WRITE(VLV_PWRDWNUPCTL, s->pwrdwnupctl); > > + I915_WRITE(GEN6_RP_DOWN_TIMEOUT,s->rp_down_timeout); > > + I915_WRITE(GEN7_RPDEUCSW, s->rp_deucsw); > > + I915_WRITE(VLV_RCUBMABDTMR, s->rcubmabdtmr); > > + I915_WRITE(VLV_RCEDATA, s->rcedata); > > + I915_WRITE(VLV_SPAREG2H, s->spare2gh); > > + > > + /* Display CZ domain, 0x4400C-0x4402C, 0x4F000-0x4F11F */ > > + I915_WRITE(GTIMR, s->gt_imr); > > + I915_WRITE(GTIER, s->gt_ier); > > + I915_WRITE(GEN6_PMIIR, s->pm_imr); > > + I915_WRITE(GEN6_PMIER, s->pm_ier); > > + > > + for (i = 0; i < ARRAY_SIZE(s->gt_scratch); i++) > > + I915_WRITE(GEN7_GT_SCRATCH_BASE + i * 4, s->gt_scratch[i]); > > + > > + /* GT SA CZ domain, 0x100000-0x138124 */ > > + I915_WRITE(TILECTL, s->tilectl); > > + I915_WRITE(GTFIFOCTL, s->gt_fifoctl); > > + /* > > + * Preserve the GT allow wake and GFX force clock bit, they are not > > + * be restored, as they are used to control the s0ix suspend/resume > > + * sequence by the caller. > > + */ > > + val = I915_READ(VLV_GTLC_WAKE_CTRL); > > + val &= VLV_GTLC_ALLOWWAKEREQ; > > + val |= s->gtlc_wake_ctrl & ~VLV_GTLC_ALLOWWAKEREQ; > > + I915_WRITE(VLV_GTLC_WAKE_CTRL, val); > > + > > + val = I915_READ(VLV_GTLC_SURVIVABILITY_REG); > > + val &= VLV_GFX_CLK_FORCE_ON_BIT; > > + val |= s->gtlc_survive & ~VLV_GFX_CLK_FORCE_ON_BIT; > > + I915_WRITE(VLV_GTLC_SURVIVABILITY_REG, val); > > + > > + I915_WRITE(VLV_PMWGICZ, s->pmwgicz); > > + > > + /* Gunit-Display CZ domain, 0x182028-0x1821CF */ > > + I915_WRITE(VLV_GU_CTL0, s->gu_ctl0); > > + I915_WRITE(VLV_GU_CTL1, s->gu_ctl1); > > + I915_WRITE(VLV_GUNIT_CLOCK_GATE2, s->clock_gate_dis2); > > +} > > + > > int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) > > { > > u32 val; > > @@ -948,6 +1140,137 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) > > #undef COND > > } > > > > +static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow) > > +{ > > + u32 val; > > + int err = 0; > > + > > + val = I915_READ(VLV_GTLC_WAKE_CTRL); > > + val &= ~VLV_GTLC_ALLOWWAKEREQ; > > + if (allow) > > + val |= VLV_GTLC_ALLOWWAKEREQ; > > + I915_WRITE(VLV_GTLC_WAKE_CTRL, val); > > + POSTING_READ(VLV_GTLC_WAKE_CTRL); > > + > > +#define COND (!!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEACK) == \ > > + allow) > > + err = wait_for(COND, 1); > > + if (err) > > + DRM_ERROR("timeout disabling GT waking\n"); > > + return err; > > +#undef COND > > +} > > + > > +static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, > > + bool wait_for_on) > > +{ > > + u32 mask; > > + u32 val; > > + int err; > > + > > + mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK; > > + val = wait_for_on ? mask : 0; > > +#define COND ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val) > > + if (COND) > > + return 0; > > + > > + DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n", > > + wait_for_on ? "on" : "off", > > + I915_READ(VLV_GTLC_PW_STATUS)); > > + > > + /* > > + * RC6 transitioning can be delayed up to 2 msec (see > > + * valleyview_enable_rps), use 3 msec for safety. > > + */ > > + err = wait_for(COND, 3); > > + if (err) > > + DRM_ERROR("timeout waiting for GT wells to go %s\n", > > + wait_for_on ? "on" : "off"); > > + > > + return err; > > +#undef COND > > +} > > + > > +static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) > > +{ > > + if (!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEERR)) > > + return; > > + > > + DRM_ERROR("GT register access while GT waking disabled\n"); > > + I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR); > > +} > > + > > +static int vlv_runtime_suspend(struct drm_i915_private *dev_priv) > > +{ > > + u32 mask; > > + int err; > > + > > + /* > > + * Bspec defines the following GT well on flags as debug only, so > > + * don't treat them as hard failures. > > + */ > > + (void)vlv_wait_for_gt_wells(dev_priv, false); > > + > > + mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS; > > + WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask); > > + > > + vlv_check_no_gt_access(dev_priv); > > + > > + err = vlv_force_gfx_clock(dev_priv, true); > > + if (err) > > + goto err1; > > + > > + err = vlv_allow_gt_wake(dev_priv, false); > > + if (err) > > + goto err2; > > + vlv_save_gunit_s0ix_state(dev_priv); > > + > > + err = vlv_force_gfx_clock(dev_priv, false); > > + if (err) > > + goto err2; > > + > > + return 0; > > + > > +err2: > > + /* For safety always re-enable waking and disable gfx clock forcing */ > > + vlv_allow_gt_wake(dev_priv, true); > > +err1: > > + vlv_force_gfx_clock(dev_priv, false); > > + > > + return err; > > +} > > + > > +static int vlv_runtime_resume(struct drm_i915_private *dev_priv) > > +{ > > + struct drm_device *dev = dev_priv->dev; > > + int err; > > + int ret; > > + > > + /* > > + * If any of the steps fail just try to continue, that's the best we > > + * can do at this point. Return the first error code (which will also > > + * leave RPM permanently disabled). > > + */ > > + ret = vlv_force_gfx_clock(dev_priv, true); > > + > > + vlv_restore_gunit_s0ix_state(dev_priv); > > + > > + err = vlv_allow_gt_wake(dev_priv, true); > > + if (!ret) > > + ret = err; > > + > > + err = vlv_force_gfx_clock(dev_priv, false); > > + if (!ret) > > + ret = err; > > + > > + vlv_check_no_gt_access(dev_priv); > > + > > + intel_init_clock_gating(dev); > > + i915_gem_restore_fences(dev); > > + > > + return ret; > > +} > > + > > static int intel_runtime_suspend(struct device *device) > > { > > struct pci_dev *pdev = to_pci_dev(device); > > @@ -970,6 +1293,8 @@ static int intel_runtime_suspend(struct device *device) > > ret = 0; > > } if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { > > ret = hsw_runtime_suspend(dev_priv); > > + } else if (IS_VALLEYVIEW(dev)) { > > + ret = vlv_runtime_suspend(dev_priv); > > } else { > > ret = -ENODEV; > > WARN_ON(1); > > @@ -1018,6 +1343,8 @@ static int intel_runtime_resume(struct device *device) > > ret = snb_runtime_resume(dev_priv); > > } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { > > ret = hsw_runtime_resume(dev_priv); > > + } else if (IS_VALLEYVIEW(dev)) { > > + ret = vlv_runtime_resume(dev_priv); > > } else { > > WARN_ON(1); > > ret = -ENODEV; > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 3cac434..77cb7fc 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -819,6 +819,67 @@ struct i915_suspend_saved_registers { > > u32 savePCH_PORT_HOTPLUG; > > }; > > > > +struct vlv_s0ix_state { > > + /* GAM */ > > + u32 wr_watermark; > > + u32 gfx_prio_ctrl; > > + u32 arb_mode; > > + u32 gfx_pend_tlb0; > > + u32 gfx_pend_tlb1; > > + u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM]; > > + u32 media_max_req_count; > > + u32 gfx_max_req_count; > > + u32 render_hwsp; > > + u32 ecochk; > > + u32 bsd_hwsp; > > + u32 blt_hwsp; > > + u32 tlb_rd_addr; > > + > > + /* MBC */ > > + u32 g3dctl; > > + u32 gsckgctl; > > + u32 mbctl; > > + > > + /* GCP */ > > + u32 ucgctl1; > > + u32 ucgctl3; > > + u32 rcgctl1; > > + u32 rcgctl2; > > + u32 rstctl; > > + u32 misccpctl; > > + > > + /* GPM */ > > + u32 gfxpause; > > + u32 rpdeuhwtc; > > + u32 rpdeuc; > > + u32 ecobus; > > + u32 pwrdwnupctl; > > + u32 rp_down_timeout; > > + u32 rp_deucsw; > > + u32 rcubmabdtmr; > > + u32 rcedata; > > + u32 spare2gh; > > + > > + /* Display 1 CZ domain */ > > + u32 gt_imr; > > + u32 gt_ier; > > + u32 pm_imr; > > + u32 pm_ier; > > + u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM]; > > + > > + /* GT SA CZ domain */ > > + u32 tilectl; > > + u32 gt_fifoctl; > > + u32 gtlc_wake_ctrl; > > + u32 gtlc_survive; > > + u32 pmwgicz; > > + > > + /* Display 2 CZ domain */ > > + u32 gu_ctl0; > > + u32 gu_ctl1; > > + u32 clock_gate_dis2; > > +}; > > + > > struct intel_gen6_power_mgmt { > > /* work and pm_iir are protected by dev_priv->irq_lock */ > > struct work_struct work; > > @@ -1447,6 +1508,7 @@ struct drm_i915_private { > > > > u32 suspend_count; > > struct i915_suspend_saved_registers regfile; > > + struct vlv_s0ix_state vlv_s0ix_state; > > > > struct { > > /* > > -- > > 1.8.4 > > > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
Attachment:
signature.asc
Description: This is a digitally signed message part
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx