On Thu, Jul 03, 2014 at 05:33:01PM -0400, Rodrigo Vivi wrote: > From: Deepak S <deepak.s@xxxxxxxxxxxxxxx> > > With RC6 enabled, BYT has an HW issue in determining the right > Gfx busyness. > WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide > on increasing/decreasing the freq. This logic will monitor C0 > counters of render/media power-wells over EI period and takes > necessary action based on these values > > v2: Refactor duplicate code. (Ville) > > v3: Reformat the comments. (Ville) > > v4: Enable required counters and remove unwanted code (Ville) > > v5: Added frequency change acceleration support and remove kernel-doc > style comments. (Ville) > > v6: Updated comment section and Fix w/a comment. (Ville) > > Signed-off-by: Deepak S <deepak.s@xxxxxxxxxxxxxxx> > Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Queued for -next, thanks for the patch. -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 15 +++++ > drivers/gpu/drm/i915/i915_irq.c | 133 +++++++++++++++++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_reg.h | 11 ++++ > drivers/gpu/drm/i915/intel_pm.c | 12 +++- > 4 files changed, 167 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 1bf277e..db33a34 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -880,6 +880,12 @@ struct vlv_s0ix_state { > u32 clock_gate_dis2; > }; > > +struct intel_rps_ei_calc { > + u32 cz_ts_ei; > + u32 render_ei_c0; > + u32 media_ei_c0; > +}; > + > struct intel_gen6_power_mgmt { > /* work and pm_iir are protected by dev_priv->irq_lock */ > struct work_struct work; > @@ -904,6 +910,8 @@ struct intel_gen6_power_mgmt { > u8 rp1_freq; /* "less than" RP0 power/freqency */ > u8 rp0_freq; /* Non-overclocked max frequency. */ > > + u32 ei_interrupt_count; > + > int last_adj; > enum { LOW_POWER, BETWEEN, HIGH_POWER } power; > > @@ -1504,6 +1512,13 @@ struct drm_i915_private { > /* gen6+ rps state */ > struct intel_gen6_power_mgmt rps; > > + /* rps wa up ei calculation */ > + struct intel_rps_ei_calc rps_up_ei; > + > + /* rps wa down ei calculation */ > + struct intel_rps_ei_calc rps_down_ei; > + > + > /* ilk-only ips/rps state. Everything in here is protected by the global > * mchdev_lock in intel_pm.c */ > struct intel_ilk_power_mgmt ips; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 0217a41..7ae17af 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1225,6 +1225,131 @@ static void notify_ring(struct drm_device *dev, > i915_queue_hangcheck(dev); > } > > +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, > + struct intel_rps_ei_calc *rps_ei) > +{ > + u32 cz_ts, cz_freq_khz; > + u32 render_count, media_count; > + u32 elapsed_render, elapsed_media, elapsed_time; > + u32 residency = 0; > + > + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); > + cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4); > + > + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG); > + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG); > + > + if (rps_ei->cz_ts_ei == 0) { > + rps_ei->cz_ts_ei = cz_ts; > + rps_ei->render_ei_c0 = render_count; > + rps_ei->media_ei_c0 = media_count; > + > + return dev_priv->rps.cur_freq; > + } > + > + elapsed_time = cz_ts - rps_ei->cz_ts_ei; > + rps_ei->cz_ts_ei = cz_ts; > + > + elapsed_render = render_count - rps_ei->render_ei_c0; > + rps_ei->render_ei_c0 = render_count; > + > + elapsed_media = media_count - rps_ei->media_ei_c0; > + rps_ei->media_ei_c0 = media_count; > + > + /* Convert all the counters into common unit of milli sec */ > + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; > + elapsed_render /= cz_freq_khz; > + elapsed_media /= cz_freq_khz; > + > + /* > + * Calculate overall C0 residency percentage > + * only if elapsed time is non zero > + */ > + if (elapsed_time) { > + residency = > + ((max(elapsed_render, elapsed_media) * 100) > + / elapsed_time); > + } > + > + return residency; > +} > + > +/** > + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU > + * busy-ness calculated from C0 counters of render & media power wells > + * @dev_priv: DRM device private > + * > + */ > +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) > +{ > + u32 residency_C0_up = 0, residency_C0_down = 0; > + u8 new_delay, adj; > + > + dev_priv->rps.ei_interrupt_count++; > + > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > + > + > + if (dev_priv->rps_up_ei.cz_ts_ei == 0) { > + vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei); > + vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei); > + return dev_priv->rps.cur_freq; > + } > + > + > + /* > + * To down throttle, C0 residency should be less than down threshold > + * for continous EI intervals. So calculate down EI counters > + * once in VLV_INT_COUNT_FOR_DOWN_EI > + */ > + if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) { > + > + dev_priv->rps.ei_interrupt_count = 0; > + > + residency_C0_down = vlv_c0_residency(dev_priv, > + &dev_priv->rps_down_ei); > + } else { > + residency_C0_up = vlv_c0_residency(dev_priv, > + &dev_priv->rps_up_ei); > + } > + > + new_delay = dev_priv->rps.cur_freq; > + > + adj = dev_priv->rps.last_adj; > + /* C0 residency is greater than UP threshold. Increase Frequency */ > + if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { > + if (adj > 0) > + adj *= 2; > + else > + adj = 1; > + > + if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit) > + new_delay = dev_priv->rps.cur_freq + adj; > + > + /* > + * For better performance, jump directly > + * to RPe if we're below it. > + */ > + if (new_delay < dev_priv->rps.efficient_freq) > + new_delay = dev_priv->rps.efficient_freq; > + > + } else if (!dev_priv->rps.ei_interrupt_count && > + (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { > + if (adj < 0) > + adj *= 2; > + else > + adj = -1; > + /* > + * This means, C0 residency is less than down threshold over > + * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq > + */ > + if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) > + new_delay = dev_priv->rps.cur_freq + adj; > + } > + > + return new_delay; > +} > + > static void gen6_pm_rps_work(struct work_struct *work) > { > struct drm_i915_private *dev_priv = > @@ -1273,6 +1398,8 @@ static void gen6_pm_rps_work(struct work_struct *work) > else > new_delay = dev_priv->rps.min_freq_softlimit; > adj = 0; > + } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > + new_delay = vlv_calc_delay_from_C0_counters(dev_priv); > } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { > if (adj < 0) > adj *= 2; > @@ -4363,7 +4490,11 @@ void intel_irq_init(struct drm_device *dev) > INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); > > /* Let's track the enabled rps events */ > - dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; > + if (IS_VALLEYVIEW(dev)) > + /* WaGsvRC0ResidenncyMethod:VLV */ > + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; > + else > + dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; > > setup_timer(&dev_priv->gpu_error.hangcheck_timer, > i915_hangcheck_elapsed, > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 3488567..99413ae 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -525,6 +525,7 @@ enum punit_power_well { > #define PUNIT_REG_GPU_FREQ_STS 0xd8 > #define GENFREQSTATUS (1<<0) > #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc > +#define PUNIT_REG_CZ_TIMESTAMP 0xce > > #define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */ > #define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */ > @@ -550,6 +551,11 @@ enum punit_power_well { > #define FB_FMAX_VMIN_FREQ_LO_SHIFT 27 > #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000 > > +#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000 > +#define VLV_RP_UP_EI_THRESHOLD 90 > +#define VLV_RP_DOWN_EI_THRESHOLD 70 > +#define VLV_INT_COUNT_FOR_DOWN_EI 5 > + > /* vlv2 north clock has */ > #define CCK_FUSE_REG 0x8 > #define CCK_FUSE_HPLL_FREQ_MASK 0x3 > @@ -5383,6 +5389,7 @@ enum punit_power_well { > #define VLV_GTLC_ALLOWWAKEERR (1 << 1) > #define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5) > #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) > +#define VLV_GTLC_SURVIVABILITY_REG 0x130098 > #define FORCEWAKE_MT 0xa188 /* multi-threaded */ > #define FORCEWAKE_KERNEL 0x1 > #define FORCEWAKE_USER 0x2 > @@ -5530,6 +5537,8 @@ enum punit_power_well { > #define GEN6_GT_GFX_RC6_LOCKED 0x138104 > #define VLV_COUNTER_CONTROL 0x138104 > #define VLV_COUNT_RANGE_HIGH (1<<15) > +#define VLV_MEDIA_RC0_COUNT_EN (1<<5) > +#define VLV_RENDER_RC0_COUNT_EN (1<<4) > #define VLV_MEDIA_RC6_COUNT_EN (1<<1) > #define VLV_RENDER_RC6_COUNT_EN (1<<0) > #define GEN6_GT_GFX_RC6 0x138108 > @@ -5538,6 +5547,8 @@ enum punit_power_well { > > #define GEN6_GT_GFX_RC6p 0x13810C > #define GEN6_GT_GFX_RC6pp 0x138110 > +#define VLV_RENDER_C0_COUNT_REG 0x138118 > +#define VLV_MEDIA_C0_COUNT_REG 0x13811C > > #define GEN6_PCODE_MAILBOX 0x138124 > #define GEN6_PCODE_READY (1<<31) > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 1e4611a..8ea96ff 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3235,8 +3235,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) > > vlv_force_gfx_clock(dev_priv, false); > > - I915_WRITE(GEN6_PMINTRMSK, > - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); > + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) > + I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events); > + else > + I915_WRITE(GEN6_PMINTRMSK, > + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); > } > > void gen6_rps_idle(struct drm_i915_private *dev_priv) > @@ -4076,6 +4079,7 @@ static void valleyview_enable_rps(struct drm_device *dev) > I915_WRITE(GEN6_RP_DOWN_EI, 350000); > > I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); > > I915_WRITE(GEN6_RP_CONTROL, > GEN6_RP_MEDIA_TURBO | > @@ -4096,9 +4100,11 @@ static void valleyview_enable_rps(struct drm_device *dev) > > /* allows RC6 residency counter to work */ > I915_WRITE(VLV_COUNTER_CONTROL, > - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | > + _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | > + VLV_RENDER_RC0_COUNT_EN | > VLV_MEDIA_RC6_COUNT_EN | > VLV_RENDER_RC6_COUNT_EN)); > + > if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) > rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; > > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx