Refactor the reclocking logic used by RPS on Ironlake to reuse the infrastructure developed for RPS on Sandybridge+, along with the waitboosting support for stalled clients and missed frames. Reported-by: dimon@xxxxxxx Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Jesse Barnes <jesse@xxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 8 --- drivers/gpu/drm/i915/i915_irq.c | 83 +++++++++++------------ drivers/gpu/drm/i915/i915_sysfs.c | 10 +++ drivers/gpu/drm/i915/intel_display.c | 1 - drivers/gpu/drm/i915/intel_pm.c | 124 ++++++++++++++++++++++++----------- drivers/platform/x86/intel_ips.c | 14 +++- include/drm/i915_drm.h | 1 + 7 files changed, 149 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a260da815ad..c35723ace814 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1088,12 +1088,6 @@ struct intel_gen6_power_mgmt { extern spinlock_t mchdev_lock; struct intel_ilk_power_mgmt { - u8 cur_delay; - u8 min_delay; - u8 max_delay; - u8 fmax; - u8 fstart; - u64 last_count1; unsigned long last_time1; unsigned long chipset_power; @@ -2533,7 +2527,6 @@ extern int i915_reset(struct drm_device *dev); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv); extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); -extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); @@ -3175,7 +3168,6 @@ extern void intel_modeset_setup_hw_state(struct drm_device *dev, bool force_restore); extern void i915_redisable_vga(struct drm_device *dev); extern void i915_redisable_vga_power_on(struct drm_device *dev); -extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void intel_init_pch_refclk(struct drm_device *dev); extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 375633356ac0..234a6e004a4d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -952,45 +952,6 @@ static void i915_hotplug_work_func(struct work_struct *work) drm_kms_helper_hotplug_event(dev); } -static void ironlake_rps_change_irq_handler(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 busy_up, busy_down, max_avg, min_avg; - u8 new_delay; - - spin_lock(&mchdev_lock); - - I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS)); - - new_delay = dev_priv->ips.cur_delay; - - I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG); - busy_up = I915_READ(RCPREVBSYTUPAVG); - busy_down = I915_READ(RCPREVBSYTDNAVG); - max_avg = I915_READ(RCBMAXAVG); - min_avg = I915_READ(RCBMINAVG); - - /* Handle RCS change request from hw */ - if (busy_up > max_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay) - new_delay = dev_priv->ips.cur_delay - 1; - if (new_delay < dev_priv->ips.max_delay) - new_delay = dev_priv->ips.max_delay; - } else if (busy_down < min_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay) - new_delay = dev_priv->ips.cur_delay + 1; - if (new_delay > dev_priv->ips.min_delay) - new_delay = dev_priv->ips.min_delay; - } - - if (ironlake_set_drps(dev, new_delay)) - dev_priv->ips.cur_delay = new_delay; - - spin_unlock(&mchdev_lock); - - return; -} - static void notify_ring(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -1039,6 +1000,36 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) dev_priv->rps.up_ei = dev_priv->rps.down_ei; } +static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv) +{ + u32 pm_iir; + + spin_lock(&mchdev_lock); + I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS)); + I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG); + +#define busy_up I915_READ(RCPREVBSYTUPAVG) +#define busy_down I915_READ(RCPREVBSYTDNAVG) +#define max_avg I915_READ(RCBMAXAVG) +#define min_avg I915_READ(RCBMINAVG) + + if (busy_up > max_avg) + pm_iir = GEN6_PM_RP_UP_THRESHOLD; + else if (busy_down < min_avg) + pm_iir = GEN6_PM_RP_DOWN_THRESHOLD; + else + pm_iir = 0; + +#undef busy_up +#undef busy_down +#undef max_avg +#undef min_avg + + spin_unlock(&mchdev_lock); + + return pm_iir; +} + static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { struct intel_rps_ei now; @@ -1095,10 +1086,14 @@ static void gen6_pm_rps_work(struct work_struct *work) spin_unlock_irq(&dev_priv->irq_lock); return; } - pm_iir = dev_priv->rps.pm_iir; - dev_priv->rps.pm_iir = 0; - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + if (IS_GEN5(dev_priv)) { + pm_iir = ilk_compute_pm_iir(dev_priv); + } else { + pm_iir = dev_priv->rps.pm_iir; + dev_priv->rps.pm_iir = 0; + /* Make sure not to corrupt PMIMR state used by ringbuffer */ + gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + } spin_unlock_irq(&dev_priv->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ @@ -2045,7 +2040,7 @@ static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir) } if (IS_GEN5(dev) && de_iir & DE_PCU_EVENT) - ironlake_rps_change_irq_handler(dev); + queue_work(dev_priv->wq, &dev_priv->rps.work); } static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index df5636093397..6f770e7f92db 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -522,6 +522,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr return snprintf(buf, PAGE_SIZE, "%d\n", val); } +static const struct attribute *gen5_attrs[] = { + &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_max_freq_mhz.attr, + &dev_attr_gt_min_freq_mhz.attr, + &dev_attr_gt_RP0_freq_mhz.attr, + &dev_attr_gt_RPn_freq_mhz.attr, + NULL, +}; static const struct attribute *gen6_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, @@ -652,6 +660,8 @@ void i915_setup_sysfs(struct drm_device *dev) ret = sysfs_create_files(&dev->primary->kdev->kobj, vlv_attrs); else if (INTEL_INFO(dev)->gen >= 6) ret = sysfs_create_files(&dev->primary->kdev->kobj, gen6_attrs); + else if (INTEL_INFO(dev)->gen >= 5) + ret = sysfs_create_files(&dev->primary->kdev->kobj, gen5_attrs); if (ret) DRM_ERROR("RPS sysfs setup failed\n"); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f63f194141db..e228070031ed 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10081,7 +10081,6 @@ void intel_mark_busy(struct drm_device *dev) return; intel_runtime_pm_get(dev_priv); - i915_update_gfx_val(dev_priv); intel_rps_busy(dev_priv); dev_priv->mm.busy = true; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0dcc7bb47f71..0cc9e95f70d3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3727,21 +3727,29 @@ DEFINE_SPINLOCK(mchdev_lock); * mchdev_lock. */ static struct drm_i915_private *i915_mch_dev; -bool ironlake_set_drps(struct drm_device *dev, u8 val) +static bool __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; u16 rgvswctl; + if (WARN_ON(val < dev_priv->rps.min_freq)) + return false; + if (WARN_ON(val > dev_priv->rps.max_freq)) + return false; + assert_spin_locked(&mchdev_lock); - rgvswctl = I915_READ16(MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { + if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) { DRM_DEBUG("gpu busy, RCS change rejected\n"); return false; /* still busy with another command */ } - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; + dev_priv->rps.cur_freq = val; + val = dev_priv->rps.max_freq - val + dev_priv->rps.min_freq; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; I915_WRITE16(MEMSWCTL, rgvswctl); POSTING_READ16(MEMSWCTL); @@ -3751,6 +3759,13 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val) return true; } +static void ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + spin_lock_irq(&mchdev_lock); + __ironlake_set_rps(dev_priv, val); + spin_unlock_irq(&mchdev_lock); +} + static void ironlake_enable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3782,16 +3797,18 @@ static void ironlake_enable_drps(struct drm_device *dev) vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; - - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; + dev_priv->rps.max_freq = fmin; + dev_priv->rps.min_freq = fmax; + dev_priv->rps.cur_freq = fmin - fstart; DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", fmax, fmin, fstart); + dev_priv->rps.max_freq_softlimit = dev_priv->rps.min_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + dev_priv->rps.efficient_freq = dev_priv->rps.cur_freq; + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); /* @@ -3808,7 +3825,7 @@ static void ironlake_enable_drps(struct drm_device *dev) DRM_ERROR("stuck trying to change perf mode\n"); mdelay(1); - ironlake_set_drps(dev, fstart); + __ironlake_set_rps(dev_priv, dev_priv->rps.cur_freq); dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + I915_READ(0x112e0); @@ -3836,7 +3853,7 @@ static void ironlake_disable_drps(struct drm_device *dev) I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); /* Go back to the starting frequency */ - ironlake_set_drps(dev, dev_priv->ips.fstart); + __ironlake_set_rps(dev_priv, dev_priv->rps.efficient_freq); mdelay(1); rgvswctl |= MEMCTL_CMD_STS; I915_WRITE(MEMSWCTL, rgvswctl); @@ -4086,29 +4103,37 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); } +static void i915_update_gfx_val(struct drm_i915_private *dev_priv); + void intel_rps_busy(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen < 6) + if (INTEL_INFO(dev_priv)->gen < 5) return; - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) - gen6_rps_reset_ei(dev_priv); - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + if (INTEL_INFO(dev_priv)->gen >= 6) { + mutex_lock(&dev_priv->rps.hw_lock); + if (dev_priv->rps.enabled) { + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) + gen6_rps_reset_ei(dev_priv); + I915_WRITE(GEN6_PMINTRMSK, + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + } + mutex_unlock(&dev_priv->rps.hw_lock); } - mutex_unlock(&dev_priv->rps.hw_lock); + + i915_update_gfx_val(dev_priv); } void intel_rps_idle(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen < 6) + if (INTEL_INFO(dev_priv)->gen < 5) return; mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { - if (IS_VALLEYVIEW(dev_priv)) + if (IS_GEN5(dev_priv)) + ironlake_set_rps(dev_priv, dev_priv->rps.idle_freq); + else if (IS_VALLEYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); @@ -4119,6 +4144,8 @@ void intel_rps_idle(struct drm_i915_private *dev_priv) while (!list_empty(&dev_priv->rps.clients)) list_del_init(dev_priv->rps.clients.next); mutex_unlock(&dev_priv->rps.hw_lock); + + i915_update_gfx_val(dev_priv); } void intel_rps_boost(struct drm_i915_private *dev_priv, @@ -4153,7 +4180,9 @@ void intel_rps_boost(struct drm_i915_private *dev_priv, void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { - if (IS_VALLEYVIEW(dev_priv)) + if (IS_GEN5(dev_priv)) + ironlake_set_rps(dev_priv, val); + else if (IS_VALLEYVIEW(dev_priv)) valleyview_set_rps(dev_priv, val); else if (INTEL_INFO(dev_priv)->gen > 6) gen6_set_rps(dev_priv, val); @@ -5366,11 +5395,9 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) dev_priv->ips.gfx_power = diff; } -void i915_update_gfx_val(struct drm_i915_private *dev_priv) +static void i915_update_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; - - if (INTEL_INFO(dev)->gen != 5) + if (INTEL_INFO(dev_priv)->gen != 5) return; spin_lock_irq(&mchdev_lock); @@ -5419,10 +5446,9 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; unsigned long val; - if (INTEL_INFO(dev)->gen != 5) + if (INTEL_INFO(dev_priv)->gen != 5) return 0; spin_lock_irq(&mchdev_lock); @@ -5479,8 +5505,8 @@ bool i915_gpu_raise(void) } dev_priv = i915_mch_dev; - if (dev_priv->ips.max_delay > dev_priv->ips.fmax) - dev_priv->ips.max_delay--; + if (dev_priv->rps.max_freq_softlimit < dev_priv->rps.max_freq) + dev_priv->rps.max_freq_softlimit++; out_unlock: spin_unlock_irq(&mchdev_lock); @@ -5507,8 +5533,8 @@ bool i915_gpu_lower(void) } dev_priv = i915_mch_dev; - if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) - dev_priv->ips.max_delay++; + if (dev_priv->rps.max_freq_softlimit > dev_priv->rps.min_freq) + dev_priv->rps.max_freq_softlimit--; out_unlock: spin_unlock_irq(&mchdev_lock); @@ -5562,9 +5588,10 @@ bool i915_gpu_turbo_disable(void) } dev_priv = i915_mch_dev; - dev_priv->ips.max_delay = dev_priv->ips.fstart; + dev_priv->rps.max_freq_softlimit = dev_priv->rps.min_freq; + dev_priv->rps.enabled = false; - if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) + if (!__ironlake_set_rps(dev_priv, dev_priv->rps.min_freq)) ret = false; out_unlock: @@ -5574,6 +5601,27 @@ out_unlock: } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); +bool i915_gpu_turbo_enable(void) +{ + struct drm_i915_private *dev_priv; + bool ret = true; + + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.enabled = true; + +out_unlock: + spin_unlock_irq(&mchdev_lock); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable); + /** * Tells the intel_ips driver that the i915 driver is now loaded, if * IPS got loaded first. @@ -6844,7 +6892,7 @@ void intel_queue_rps_boost_for_request(struct drm_device *dev, { struct request_boost *boost; - if (rq == NULL || INTEL_INFO(dev)->gen < 6) + if (rq == NULL || INTEL_INFO(dev)->gen < 5) return; if (i915_gem_request_completed(rq, true)) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index e2065e06a3f3..16030fbbd611 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -338,6 +338,7 @@ struct ips_driver { bool (*gpu_lower)(void); bool (*gpu_busy)(void); bool (*gpu_turbo_disable)(void); + bool (*gpu_turbo_enable)(void); /* For restoration at unload */ u64 orig_turbo_limit; @@ -577,7 +578,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips) { if (ips->__gpu_turbo_on) return; - ips->__gpu_turbo_on = true; + + if (!ips->gpu_turbo_enable()) + dev_err(&ips->dev->dev, "failed to enable graphics turbo\n"); + else + ips->__gpu_turbo_on = true; } /** @@ -1438,9 +1443,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips) ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable); if (!ips->gpu_turbo_disable) goto out_put_busy; + ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable); + if (!ips->gpu_turbo_enable) + goto out_put_disable; return true; +out_put_disable: + symbol_put(i915_gpu_turbo_disable); out_put_busy: symbol_put(i915_gpu_busy); out_put_lower: @@ -1702,6 +1712,8 @@ static void ips_remove(struct pci_dev *dev) symbol_put(i915_gpu_busy); if (ips->gpu_turbo_disable) symbol_put(i915_gpu_turbo_disable); + if (ips->gpu_turbo_enable) + symbol_put(i915_gpu_turbo_enable); rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN); diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 595f85c392ac..406710c30658 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void); extern bool i915_gpu_lower(void); extern bool i915_gpu_busy(void); extern bool i915_gpu_turbo_disable(void); +extern bool i915_gpu_turbo_enable(void); /* * The Bridge device's PCI config space has information about the -- 2.1.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx