On Mon, Aug 11, 2014 at 11:08:38AM -0700, Daisy Sun wrote: > BDW supports GT C0 residency reporting in constant time unit. Driver > calculates GT utilization based on C0 residency and adjusts RP > frequency up/down accordingly. For offscreen workload specificly, > set frequency to RP0. > > Offscreen task is not restricted by frame rate, it can be > executed as soon as possible. Transcoding and serilized workload > between CPU and GPU both need high GT performance, RP0 is a good > option in this case. RC6 will kick in to compensate power > consumption when GT is not active. > > v2: Rebase on recent drm-intel-nightly > v3: Add flip timerout monitor, when no flip is deteced within > 100ms, set frequency to RP0. Ok, let's make this really clear: If you wire this into the flip handling in any way, I will not merge your patch. The timer should be fully independant and tie into the gpu busy/idle handling we already have. Thanks, Daniel > > Signed-off-by: Daisy Sun <daisy.sun@xxxxxxxxx> > [torourke: rebased on latest and resolved conflict] > Signed-off-by: Tom O'Rourke <Tom.O'Rourke@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 22 ++++ > drivers/gpu/drm/i915/i915_irq.c | 21 ++++ > drivers/gpu/drm/i915/i915_reg.h | 4 + > drivers/gpu/drm/i915/intel_display.c | 3 + > drivers/gpu/drm/i915/intel_pm.c | 230 +++++++++++++++++++++++++++++------ > 5 files changed, 241 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index ef38c3b..f1c4c5b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -915,6 +915,23 @@ struct intel_rps_ei { > u32 media_c0; > }; > > +struct intel_rps_bdw_cal { > + u32 it_threshold_pct; /* interrupt, in percentage */ > + u32 eval_interval; /* evaluation interval, in us */ > + u32 last_ts; > + u32 last_c0; > + bool is_up; > +}; > + > +struct intel_rps_bdw_turbo { > + struct intel_rps_bdw_cal up; > + struct intel_rps_bdw_cal down; > + struct timer_list flip_timer; > + u32 timeout; > + atomic_t flip_received; > + struct work_struct work_max_freq; > +}; > + > struct intel_gen6_power_mgmt { > /* work and pm_iir are protected by dev_priv->irq_lock */ > struct work_struct work; > @@ -948,6 +965,9 @@ struct intel_gen6_power_mgmt { > bool enabled; > struct delayed_work delayed_resume_work; > > + bool is_bdw_sw_turbo; /* Switch of BDW software turbo */ > + struct intel_rps_bdw_turbo sw_turbo; /* Calculate RP interrupt timing */ > + > /* manual wa residency calculations */ > struct intel_rps_ei up_ei, down_ei; > > @@ -2703,6 +2723,8 @@ extern void intel_disable_fbc(struct drm_device *dev); > extern bool ironlake_set_drps(struct drm_device *dev, u8 val); > extern void intel_init_pch_refclk(struct drm_device *dev); > extern void gen6_set_rps(struct drm_device *dev, u8 val); > +extern void bdw_software_turbo(struct drm_device *dev); > +extern void gen8_flip_interrupt(struct drm_device *dev); > extern void valleyview_set_rps(struct drm_device *dev, u8 val); > extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, > bool enable); > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 6ef9d6f..367f8e1 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1961,6 +1961,27 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe) > res1, res2); > } > > +void gen8_flip_interrupt(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + if (!dev_priv->rps.is_bdw_sw_turbo) > + return; > + > + if(atomic_read(&dev_priv->rps.sw_turbo.flip_received)) { > + mod_timer(&dev_priv->rps.sw_turbo.flip_timer, > + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies); > + } > + else { > + dev_priv->rps.sw_turbo.flip_timer.expires = > + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies; > + add_timer(&dev_priv->rps.sw_turbo.flip_timer); > + atomic_set(&dev_priv->rps.sw_turbo.flip_received, true); > + } > + > + bdw_software_turbo(dev); > +} > + > /* The RPS events need forcewake, so we add them to a work queue and mask their > * IMR bits until the work is done. Other interrupts can be processed without > * the work queue. */ > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index fe5c276..088e0e1 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -5453,6 +5453,10 @@ enum punit_power_well { > #define GEN8_UCGCTL6 0x9430 > #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14) > > +#define TIMESTAMP_CTR 0x44070 > +#define FREQ_1_28_US(us) (((us) * 100) >> 7) > +#define MCHBAR_PCU_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5960) > + > #define GEN6_GFXPAUSE 0xA000 > #define GEN6_RPNSWREQ 0xA008 > #define GEN6_TURBO_DISABLE (1<<31) > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 99eb7ca..1dd8a7c 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -9661,6 +9661,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > unsigned long flags; > int ret; > > + //trigger software GT busyness calculation > + gen8_flip_interrupt(dev); > + > /* > * drm_mode_page_flip_ioctl() should already catch this, but double > * check to be safe. In the future we may enable pageflipping from > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 3f88f29..e13d0ff 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2122,7 +2122,6 @@ int ilk_wm_max_level(const struct drm_device *dev) > else > return 2; > } > - > static void intel_print_wm_latency(struct drm_device *dev, > const char *name, > const uint16_t wm[5]) > @@ -3091,6 +3090,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) > { > int new_power; > > + if (dev_priv->rps.is_bdw_sw_turbo) > + return; > + > new_power = dev_priv->rps.power; > switch (dev_priv->rps.power) { > case LOW_POWER: > @@ -3298,8 +3300,11 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) > valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); > else if (IS_VALLEYVIEW(dev)) > vlv_set_rps_idle(dev_priv); > - else > + else if (!dev_priv->rps.is_bdw_sw_turbo > + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ > gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); > + } > + > dev_priv->rps.last_adj = 0; > } > mutex_unlock(&dev_priv->rps.hw_lock); > @@ -3313,8 +3318,11 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) > if (dev_priv->rps.enabled) { > if (IS_VALLEYVIEW(dev)) > valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); > - else > + else if (!dev_priv->rps.is_bdw_sw_turbo > + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ > gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); > + } > + > dev_priv->rps.last_adj = 0; > } > mutex_unlock(&dev_priv->rps.hw_lock); > @@ -3345,21 +3353,26 @@ void valleyview_set_rps(struct drm_device *dev, u8 val) > static void gen8_disable_rps_interrupts(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > + if (IS_BROADWELL(dev) && dev_priv->rps.is_bdw_sw_turbo){ > + if (atomic_read(&dev_priv->rps.sw_turbo.flip_received)) > + del_timer(&dev_priv->rps.sw_turbo.flip_timer); > + dev_priv-> rps.is_bdw_sw_turbo = false; > + } else { > + I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); > + I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & > + ~dev_priv->pm_rps_events); > + /* Complete PM interrupt masking here doesn't race with the rps work > + * item again unmasking PM interrupts because that is using a different > + * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in > + * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which > + * gen8_enable_rps will clean up. */ > > - I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); > - I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & > - ~dev_priv->pm_rps_events); > - /* Complete PM interrupt masking here doesn't race with the rps work > - * item again unmasking PM interrupts because that is using a different > - * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in > - * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which > - * gen8_enable_rps will clean up. */ > - > - spin_lock_irq(&dev_priv->irq_lock); > - dev_priv->rps.pm_iir = 0; > - spin_unlock_irq(&dev_priv->irq_lock); > + spin_lock_irq(&dev_priv->irq_lock); > + dev_priv->rps.pm_iir = 0; > + spin_unlock_irq(&dev_priv->irq_lock); > > - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); > + I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); > + } > } > > static void gen6_disable_rps_interrupts(struct drm_device *dev) > @@ -3511,13 +3524,111 @@ static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_c > dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; > } > > +static void bdw_sw_calculate_freq(struct drm_device *dev, > + struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + u64 busy = 0; > + u32 busyness_pct = 0; > + u32 elapsed_time = 0; > + u16 new_freq = 0; > + > + if (!c || !cur_time || !c0) > + return; > + > + if (0 == c->last_c0) > + goto out; > + > + /* Check Evaluation interval */ > + elapsed_time = *cur_time - c->last_ts; > + if (elapsed_time < c->eval_interval) > + return; > + > + mutex_lock(&dev_priv->rps.hw_lock); > + > + /* > + * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec. > + * Whole busyness_pct calculation should be > + * busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100; > + * busyness_pct = (u32)(busy * 100 / elapsed_time); > + * The final formula is to simplify CPU calculation > + */ > + busy = (u64)(*c0 - c->last_c0) << 12; > + do_div(busy, elapsed_time); > + busyness_pct = (u32)busy; > + > + if (c->is_up && busyness_pct >= c->it_threshold_pct) > + new_freq = (u16)dev_priv->rps.cur_freq + 3; > + if (!c->is_up && busyness_pct <= c->it_threshold_pct) > + new_freq = (u16)dev_priv->rps.cur_freq - 1; > + > + /* Adjust to new frequency busyness and compare with threshold */ > + if (0 != new_freq) { > + if (new_freq > dev_priv->rps.max_freq_softlimit) > + new_freq = dev_priv->rps.max_freq_softlimit; > + else if (new_freq < dev_priv->rps.min_freq_softlimit) > + new_freq = dev_priv->rps.min_freq_softlimit; > + > + gen6_set_rps(dev, new_freq); > + } > + > + mutex_unlock(&dev_priv->rps.hw_lock); > + > +out: > + c->last_c0 = *c0; > + c->last_ts = *cur_time; > +} > + > +static void gen8_set_frequency_RP0(struct work_struct *work) > +{ > + struct intel_rps_bdw_turbo *p_bdw_turbo = > + container_of(work, struct intel_rps_bdw_turbo, work_max_freq); > + struct intel_gen6_power_mgmt *p_power_mgmt = > + container_of(p_bdw_turbo, struct intel_gen6_power_mgmt, sw_turbo); > + struct drm_i915_private *dev_priv = > + container_of(p_power_mgmt, struct drm_i915_private, rps); > + > + mutex_lock(&dev_priv->rps.hw_lock); > + gen6_set_rps(dev_priv->dev, dev_priv->rps.rp0_freq); > + mutex_unlock(&dev_priv->rps.hw_lock); > +} > + > +static void flip_active_timeout_handler(unsigned long var) > +{ > + struct drm_i915_private *dev_priv = (struct drm_i915_private *) var; > + > + del_timer(&dev_priv->rps.sw_turbo.flip_timer); > + atomic_set(&dev_priv->rps.sw_turbo.flip_received, false); > + > + queue_work(dev_priv->wq, &dev_priv->rps.sw_turbo.work_max_freq); > +} > + > +void bdw_software_turbo(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */ > + u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */ > + > + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up, > + ¤t_time, ¤t_c0); > + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down, > + ¤t_time, ¤t_c0); > +} > + > static void gen8_enable_rps(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring; > uint32_t rc6_mask = 0, rp_state_cap; > + uint32_t threshold_up_pct, threshold_down_pct; > + uint32_t ei_up, ei_down; /* up and down evaluation interval */ > + u32 rp_ctl_flag; > int unused; > > + /* Use software Turbo for BDW */ > + dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev); > + > /* 1a: Software RC state - RC0 */ > I915_WRITE(GEN6_RC_STATE, 0); > > @@ -3561,35 +3672,74 @@ static void gen8_enable_rps(struct drm_device *dev) > HSW_FREQUENCY(dev_priv->rps.rp1_freq)); > I915_WRITE(GEN6_RC_VIDEO_FREQ, > HSW_FREQUENCY(dev_priv->rps.rp1_freq)); > - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ > - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ > + ei_up = 84480; /* 84.48ms */ > + ei_down = 448000; > + threshold_up_pct = 90; /* x percent busy */ > + threshold_down_pct = 70; > + > + if (dev_priv->rps.is_bdw_sw_turbo) { > + dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct; > + dev_priv->rps.sw_turbo.up.eval_interval = ei_up; > + dev_priv->rps.sw_turbo.up.is_up = true; > + dev_priv->rps.sw_turbo.up.last_ts = 0; > + dev_priv->rps.sw_turbo.up.last_c0 = 0; > + > + dev_priv->rps.sw_turbo.down.it_threshold_pct = threshold_down_pct; > + dev_priv->rps.sw_turbo.down.eval_interval = ei_down; > + dev_priv->rps.sw_turbo.down.is_up = false; > + dev_priv->rps.sw_turbo.down.last_ts = 0; > + dev_priv->rps.sw_turbo.down.last_c0 = 0; > + > + /* Start the timer to track if flip comes*/ > + dev_priv->rps.sw_turbo.timeout = 200*1000; /* in us */ > + > + init_timer(&dev_priv->rps.sw_turbo.flip_timer); > + dev_priv->rps.sw_turbo.flip_timer.function = flip_active_timeout_handler; > + dev_priv->rps.sw_turbo.flip_timer.data = (unsigned long) dev_priv; > + dev_priv->rps.sw_turbo.flip_timer.expires = > + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies; > + add_timer(&dev_priv->rps.sw_turbo.flip_timer); > + INIT_WORK(&dev_priv->rps.sw_turbo.work_max_freq, gen8_set_frequency_RP0); > + > + atomic_set(&dev_priv->rps.sw_turbo.flip_received, true); > + } else { > + /* NB: Docs say 1s, and 1000000 - which aren't equivalent > + * 1 second timeout*/ > + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000)); > > - /* Docs recommend 900MHz, and 300 MHz respectively */ > - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, > - dev_priv->rps.max_freq_softlimit << 24 | > - dev_priv->rps.min_freq_softlimit << 16); > + /* Docs recommend 900MHz, and 300 MHz respectively */ > + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, > + dev_priv->rps.max_freq_softlimit << 24 | > + dev_priv->rps.min_freq_softlimit << 16); > > - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ > - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ > - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ > - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ > + I915_WRITE(GEN6_RP_UP_THRESHOLD, > + FREQ_1_28_US(ei_up * threshold_up_pct / 100)); > + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, > + FREQ_1_28_US(ei_down * threshold_down_pct / 100)); > + I915_WRITE(GEN6_RP_UP_EI, > + FREQ_1_28_US(ei_up)); > + I915_WRITE(GEN6_RP_DOWN_EI, > + FREQ_1_28_US(ei_down)); > > - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + } > > /* 5: Enable RPS */ > - I915_WRITE(GEN6_RP_CONTROL, > - GEN6_RP_MEDIA_TURBO | > - GEN6_RP_MEDIA_HW_NORMAL_MODE | > - GEN6_RP_MEDIA_IS_GFX | > - GEN6_RP_ENABLE | > - GEN6_RP_UP_BUSY_AVG | > - GEN6_RP_DOWN_IDLE_AVG); > - > - /* 6: Ring frequency + overclocking (our driver does this later */ > - > + rp_ctl_flag = GEN6_RP_MEDIA_TURBO | > + GEN6_RP_MEDIA_HW_NORMAL_MODE | > + GEN6_RP_MEDIA_IS_GFX | > + GEN6_RP_UP_BUSY_AVG | > + GEN6_RP_DOWN_IDLE_AVG; > + if (!dev_priv->rps.is_bdw_sw_turbo) > + rp_ctl_flag |= GEN6_RP_ENABLE; > + > + I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag); > + > + /* 6: Ring frequency + overclocking > + * (our driver does this later */ > gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); > - > - gen8_enable_rps_interrupts(dev); > + if (!dev_priv->rps.is_bdw_sw_turbo) > + gen8_enable_rps_interrupts(dev); > > gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); > } > @@ -5018,6 +5168,8 @@ static void intel_gen6_powersave_work(struct work_struct *work) > rps.delayed_resume_work.work); > struct drm_device *dev = dev_priv->dev; > > + dev_priv->rps.is_bdw_sw_turbo = false; > + > mutex_lock(&dev_priv->rps.hw_lock); > > if (IS_CHERRYVIEW(dev)) { > -- > 1.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx