Refactor the reclocking logic used by RPS on Ironlake to reuse the infrastructure developed for RPS on Sandybridge+, along with the waitboosting support for stalled clients and missed frames. Reported-by: dimon@xxxxxxx Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 19 +- drivers/gpu/drm/i915/i915_irq.c | 58 +--- drivers/gpu/drm/i915/i915_request.c | 1 - drivers/gpu/drm/i915/i915_sysfs.c | 10 + drivers/gpu/drm/i915/intel_gt_pm.c | 575 ++++++++++++++++++++---------------- drivers/gpu/drm/i915/intel_pm.c | 10 - drivers/platform/x86/intel_ips.c | 14 +- include/drm/i915_drm.h | 1 + 9 files changed, 357 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ccb01244e616..7c7afdac8c8c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1421,6 +1421,7 @@ static int ironlake_drpc_info(struct seq_file *m) yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); seq_printf(m, "SW control enabled: %s\n", yesno(rgvmodectl & MEMMODE_SWMODE_EN)); + seq_printf(m, "RPS active? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Gated voltage change: %s\n", yesno(rgvmodectl & MEMMODE_RCLK_GATE)); seq_printf(m, "Starting frequency: P%d\n", @@ -2201,10 +2202,12 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_gpu_freq(dev_priv, rps->freq), intel_gpu_freq(dev_priv, rps->min), intel_gpu_freq(dev_priv, rps->max)); - seq_printf(m, " min hard:%d, user:%d; max user:%d, hard:%d\n", + seq_printf(m, " min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n", intel_gpu_freq(dev_priv, rps->min_freq_hw), + intel_gpu_freq(dev_priv, rps->min_freq_soft), intel_gpu_freq(dev_priv, rps->min_freq_user), intel_gpu_freq(dev_priv, rps->max_freq_user), + intel_gpu_freq(dev_priv, rps->max_freq_soft), intel_gpu_freq(dev_priv, rps->max_freq_hw)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", intel_gpu_freq(dev_priv, rps->idle_freq), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cfbcaa8556e0..82e9a58bd65f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -761,6 +761,8 @@ struct intel_rps { u8 max_freq_hw; /* Maximum frequency, RP0 if not overclocking */ u8 min_freq_user; /* Minimum frequency permitted by the driver */ u8 max_freq_user; /* Max frequency permitted by the driver */ + u8 min_freq_soft; + u8 max_freq_soft; u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ @@ -788,18 +790,14 @@ struct intel_rps { extern spinlock_t mchdev_lock; struct intel_ips { - u8 cur_delay; - u8 min_delay; - u8 max_delay; - u8 fmax; - u8 fstart; - - u64 last_count1; - unsigned long last_time1; unsigned long chipset_power; - u64 last_count2; - u64 last_time2; unsigned long gfx_power; + + ktime_t last_time1; + ktime_t last_time2; + + u64 last_count1; + u32 last_count2; u8 corr; int c_m; @@ -2698,7 +2696,6 @@ extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv); extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); -extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); int intel_engines_init_mmio(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9a52692395f2..facaae27a969 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -850,45 +850,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) return position; } -static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) -{ - struct intel_ips *ips = &dev_priv->gt_pm.ips; - u32 busy_up, busy_down, max_avg, min_avg; - u8 new_delay; - - spin_lock(&mchdev_lock); - - I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS)); - - new_delay = ips->cur_delay; - - I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG); - busy_up = I915_READ(RCPREVBSYTUPAVG); - busy_down = I915_READ(RCPREVBSYTDNAVG); - max_avg = I915_READ(RCBMAXAVG); - min_avg = I915_READ(RCBMINAVG); - - /* Handle RCS change request from hw */ - if (busy_up > max_avg) { - if (ips->cur_delay != ips->max_delay) - new_delay = ips->cur_delay - 1; - if (new_delay < ips->max_delay) - new_delay = ips->max_delay; - } else if (busy_down < min_avg) { - if (ips->cur_delay != ips->min_delay) - new_delay = ips->cur_delay + 1; - if (new_delay > ips->min_delay) - new_delay = ips->min_delay; - } - - if (ironlake_set_drps(dev_priv, new_delay)) - ips->cur_delay = new_delay; - - spin_unlock(&mchdev_lock); - - return; -} - static void notify_ring(struct intel_engine_cs *engine) { struct i915_request *rq = NULL; @@ -2047,8 +2008,12 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, I915_WRITE(SDEIIR, pch_iir); } - if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT) - ironlake_rps_change_irq_handler(dev_priv); + if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + rps->pm_iir = GEN6_PM_RP_DOWN_EI_EXPIRED; + schedule_work(&rps->work); + } } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -3335,17 +3300,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev) ibx_irq_postinstall(dev); - if (IS_IRONLAKE_M(dev_priv)) { - /* Enable PCU event interrupts - * - * spinlocking not required here for correctness since interrupt - * setup is guaranteed to run in single-threaded context. But we - * need it to make the assert_spin_locked happy. */ - spin_lock_irq(&dev_priv->irq_lock); - ilk_enable_display_irq(dev_priv, DE_PCU_EVENT); - spin_unlock_irq(&dev_priv->irq_lock); - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 605770191ceb..5dbb1905f28a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -275,7 +275,6 @@ static void mark_busy(struct drm_i915_private *i915) i915->gt.epoch = 1; intel_gt_pm_busy(i915); - i915_update_gfx_val(i915); i915_pmu_gt_unparked(i915); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 2d4c7f2e0878..063cd00d2aae 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -457,6 +457,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr return snprintf(buf, PAGE_SIZE, "%d\n", val); } +static const struct attribute *gen5_attrs[] = { + &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_max_freq_mhz.attr, + &dev_attr_gt_min_freq_mhz.attr, + &dev_attr_gt_RP0_freq_mhz.attr, + &dev_attr_gt_RPn_freq_mhz.attr, + NULL, +}; static const struct attribute *gen6_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, @@ -593,6 +601,8 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) ret = sysfs_create_files(&kdev->kobj, vlv_attrs); else if (INTEL_GEN(dev_priv) >= 6) ret = sysfs_create_files(&kdev->kobj, gen6_attrs); + else if (INTEL_GEN(dev_priv) >= 5) + ret = sysfs_create_files(&kdev->kobj, gen5_attrs); if (ret) DRM_ERROR("RPS sysfs setup failed\n"); diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c index def292cfd181..6f5c14421c90 100644 --- a/drivers/gpu/drm/i915/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/intel_gt_pm.c @@ -34,44 +34,62 @@ * which brings the most power savings; deeper states save more power, but * require higher latency to switch to and wake up. */ +static void gen5_update_gfx_val(struct drm_i915_private *dev_priv); /* * Lock protecting IPS related data structures */ DEFINE_SPINLOCK(mchdev_lock); -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) +static int __ironlake_wait_for_rps(struct drm_i915_private *dev_priv) { + return wait_for_atomic((I915_READ16(MEMSWCTL) & MEMCTL_CMD_STS) == 0, + 10) == 0; +} + +static int __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; u16 rgvswctl; lockdep_assert_held(&mchdev_lock); - rgvswctl = I915_READ16(MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + if (!__ironlake_wait_for_rps(dev_priv)) { + DRM_DEBUG_DRIVER("gpu busy, RCS change rejected\n"); + return -EAGAIN; /* still busy with another command */ } - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; + val = rps->max_freq_hw - val + rps->min_freq_hw; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; I915_WRITE16(MEMSWCTL, rgvswctl); POSTING_READ16(MEMSWCTL); rgvswctl |= MEMCTL_CMD_STS; I915_WRITE16(MEMSWCTL, rgvswctl); - return true; + return 0; +} + +static int ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + if (val != dev_priv->gt_pm.rps.freq) { + spin_lock_irq(&mchdev_lock); + __ironlake_set_rps(dev_priv, val); + spin_unlock_irq(&mchdev_lock); + } + + return 0; } static void ironlake_enable_drps(struct drm_i915_private *dev_priv) { struct intel_ips *ips = &dev_priv->gt_pm.ips; - u32 rgvmodectl; - u8 fmax, fmin, fstart, vstart; - spin_lock_irq(&mchdev_lock); - - rgvmodectl = I915_READ(MEMMODECTL); + spin_lock(&mchdev_lock); /* Enable temp reporting */ I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); @@ -87,75 +105,67 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv) I915_WRITE(MEMIHYST, 1); - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; - - vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - - ips->fmax = fmax; /* IPS callback will increase this */ - ips->fstart = fstart; - - ips->max_delay = fstart; - ips->min_delay = fmin; - ips->cur_delay = fstart; - - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); - I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); - /* - * Interrupts will be enabled in ironlake_irq_postinstall - */ - - I915_WRITE(VIDSTART, vstart); - POSTING_READ(VIDSTART); - - rgvmodectl |= MEMMODE_SWMODE_EN; - I915_WRITE(MEMMODECTL, rgvmodectl); - - if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) + I915_WRITE(MEMMODECTL, I915_READ(MEMMODECTL) | MEMMODE_SWMODE_EN); + if (!__ironlake_wait_for_rps(dev_priv)) DRM_ERROR("stuck trying to change perf mode\n"); mdelay(1); - ironlake_set_drps(dev_priv, fstart); + ips->last_count1 = I915_READ(DMIEC); + ips->last_count1 += I915_READ(DDREC); + ips->last_count1 += I915_READ(CSIEC); + ips->last_time1 = ktime_get_raw(); - ips->last_count1 = - I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC); - ips->last_time1 = jiffies_to_msecs(jiffies); ips->last_count2 = I915_READ(GFXEC); ips->last_time2 = ktime_get_raw_ns(); - spin_unlock_irq(&mchdev_lock); + spin_unlock(&mchdev_lock); +} + +static void ironlake_init_drps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 rgvmodectl; + u8 fmax, fmin, fstart; + + spin_lock(&mchdev_lock); + rgvmodectl = I915_READ(MEMMODECTL); + spin_unlock(&mchdev_lock); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->max_freq_hw = fmin; + rps->min_freq_hw = fmax; + rps->efficient_freq = fmin - fstart; + + I915_WRITE(VIDSTART, + (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT); } static void ironlake_disable_drps(struct drm_i915_private *dev_priv) { u16 rgvswctl; - spin_lock_irq(&mchdev_lock); + spin_lock(&mchdev_lock); rgvswctl = I915_READ16(MEMSWCTL); /* Ack interrupts, disable EFC interrupt */ I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); - I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); - I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); - I915_WRITE(DEIIR, DE_PCU_EVENT); - I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); + I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG); + I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS)); - /* Go back to the starting frequency */ - ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart); - mdelay(1); rgvswctl |= MEMCTL_CMD_STS; - I915_WRITE(MEMSWCTL, rgvswctl); - mdelay(1); + I915_WRITE16(MEMSWCTL, rgvswctl); - spin_unlock_irq(&mchdev_lock); + spin_unlock(&mchdev_lock); } /* @@ -376,6 +386,8 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val) return valleyview_set_rps(dev_priv, val); else if (INTEL_GEN(dev_priv) >= 6) return gen6_set_rps(dev_priv, val); + else if (INTEL_GEN(dev_priv) >= 5) + return ironlake_set_rps(dev_priv, val); else return 0; } @@ -389,8 +401,12 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj) lockdep_assert_held(&rps->lock); GEM_BUG_ON(!rps->active); - min = rps->min_freq_user; - max = rps->max_freq_user; + min = clamp_t(int, + rps->min_freq_soft, + rps->min_freq_user, rps->max_freq_user); + max = clamp_t(int, + rps->max_freq_soft, + min, rps->max_freq_user); if (atomic_read(&rps->num_waiters) && max < rps->boost_freq) max = rps->boost_freq; @@ -480,7 +496,7 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, lockdep_assert_held(&dev_priv->irq_lock); dev_priv->gt_pm.ier &= ~disable_mask; - gen6_update_pm_irq(dev_priv, disable_mask, 0); + gen6_mask_pm_irq(dev_priv, disable_mask); I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier); /* though a barrier is missing here, but don't really need a one */ } @@ -503,7 +519,10 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv) return; spin_lock_irq(&dev_priv->irq_lock); - gen6_enable_pm_irq(dev_priv, rps->pm_events); + if (INTEL_GEN(dev_priv) >= 6) + gen6_enable_pm_irq(dev_priv, rps->pm_events); + else if (IS_IRONLAKE_M(dev_priv)) + ilk_enable_display_irq(dev_priv, DE_PCU_EVENT); spin_unlock_irq(&dev_priv->irq_lock); } @@ -515,8 +534,13 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv) return; spin_lock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); - gen6_disable_pm_irq(dev_priv, rps->pm_events); + if (INTEL_GEN(dev_priv) >= 6) { + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); + gen6_disable_pm_irq(dev_priv, rps->pm_events); + } else if (IS_IRONLAKE_M(dev_priv)) { + ilk_disable_display_irq(dev_priv, DE_PCU_EVENT); + } spin_unlock_irq(&dev_priv->irq_lock); synchronize_irq(dev_priv->drm.irq); @@ -570,6 +594,37 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) return events; } +static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv, u32 pm_iir) +{ + if ((pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) == 0) + return 0; + + spin_lock(&mchdev_lock); + I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS)); + I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG); + +#define busy_up I915_READ(RCPREVBSYTUPAVG) +#define busy_down I915_READ(RCPREVBSYTDNAVG) +#define max_avg I915_READ(RCBMAXAVG) +#define min_avg I915_READ(RCBMINAVG) + + if (busy_up > max_avg) + pm_iir = GEN6_PM_RP_UP_THRESHOLD; + else if (busy_down < min_avg) + pm_iir = GEN6_PM_RP_DOWN_THRESHOLD; + else + pm_iir = 0; + +#undef busy_up +#undef busy_down +#undef max_avg +#undef min_avg + + spin_unlock(&mchdev_lock); + + return pm_iir; +} + static void intel_rps_work(struct work_struct *work) { struct drm_i915_private *i915 = @@ -579,8 +634,9 @@ static void intel_rps_work(struct work_struct *work) int freq, adj; u32 pm_iir; - pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events; + pm_iir = xchg(&rps->pm_iir, 0); pm_iir |= vlv_wa_c0_ei(i915, pm_iir); + pm_iir |= ilk_compute_pm_iir(i915, pm_iir); client_boost = atomic_read(&rps->num_waiters); @@ -620,7 +676,7 @@ static void intel_rps_work(struct work_struct *work) if (adjust_rps(i915, freq, adj)) DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - if (pm_iir) { + if (pm_iir && INTEL_GEN(i915) >= 6) { spin_lock_irq(&i915->irq_lock); gen6_unmask_pm_irq(i915, rps->pm_events); spin_unlock_irq(&i915->irq_lock); @@ -663,10 +719,10 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv) */ adjust_rps(dev_priv, max(rps->freq, rps->efficient_freq), 0); - if (INTEL_GEN(dev_priv) >= 6) { - memset(&rps->ei, 0, sizeof(rps->ei)); - enable_rps_interrupts(dev_priv); - } + memset(&rps->ei, 0, sizeof(rps->ei)); + enable_rps_interrupts(dev_priv); + if (IS_GEN5(dev_priv)) + gen5_update_gfx_val(dev_priv); mutex_unlock(&rps->lock); } @@ -720,7 +776,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv) * state of the worker can be discarded. */ cancel_work_sync(&rps->work); - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_reset_rps_interrupts(dev_priv); } void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client) @@ -1531,6 +1588,110 @@ static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) dev_priv->gt_pm.rps.gpll_ref_freq); } +static unsigned long ilk_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return (div * 133333) / (pre << post); +} + +static void ilk_init_emon(struct drm_i915_private *dev_priv) +{ + u32 lcfuse; + u8 pxw[16]; + int i; + + /* Disable to program */ + I915_WRITE(ECR, 0); + POSTING_READ(ECR); + + /* Program energy weights for various events */ + I915_WRITE(SDEW, 0x15040d00); + I915_WRITE(CSIEW0, 0x007f0000); + I915_WRITE(CSIEW1, 0x1e220004); + I915_WRITE(CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + I915_WRITE(PEW(i), 0); + for (i = 0; i < 3; i++) + I915_WRITE(DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = I915_READ(PXVFREQ(i)); + unsigned long freq = ilk_pxfreq(pxvidfreq); + unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; + unsigned long val; + + val = vid * vid; + val *= (freq / 1000); + val *= 255; + val /= (127*127*900); + if (val > 0xff) + DRM_ERROR("bad pxval: %ld\n", val); + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | + (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); + I915_WRITE(PXW(i), val); + } + + /* Adjust magic regs to magic values (more experimental results) */ + I915_WRITE(OGW0, 0); + I915_WRITE(OGW1, 0); + I915_WRITE(EG0, 0x00007f00); + I915_WRITE(EG1, 0x0000000e); + I915_WRITE(EG2, 0x000e0000); + I915_WRITE(EG3, 0x68000300); + I915_WRITE(EG4, 0x42000000); + I915_WRITE(EG5, 0x00140031); + I915_WRITE(EG6, 0); + I915_WRITE(EG7, 0); + + for (i = 0; i < 8; i++) + I915_WRITE(PXWL(i), 0); + + /* Enable PMON + select events */ + I915_WRITE(ECR, 0x80000019); + + lcfuse = I915_READ(LCFUSE02); + + dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK); +} + + +static void ilk_init_frequencies(struct drm_i915_private *i915) +{ + struct intel_ips *ips = &i915->gt_pm.ips; + + ips->r_t = i915->mem_freq; + + if (i915->fsb_freq <= 3200) + ips->c_m = 0; + else if (i915->fsb_freq <= 4800) + ips->c_m = 1; + else + ips->c_m = 2; +} + +static void gen5_init_gt_powersave(struct drm_i915_private *i915) +{ + ilk_init_frequencies(i915); + ilk_init_emon(i915); + ironlake_init_drps(i915); +} + static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -1830,18 +1991,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static unsigned int intel_pxfreq(u32 vidfreq) -{ - unsigned int div = (vidfreq & 0x3f0000) >> 16; - unsigned int post = (vidfreq & 0x3000) >> 12; - unsigned int pre = (vidfreq & 0x7); - - if (!pre) - return 0; - - return (div * 133333) / (pre << post); -} - static const struct cparams { u16 i; u16 t; @@ -1859,14 +2008,19 @@ static const struct cparams { static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) { struct intel_ips *ips = &dev_priv->gt_pm.ips; - u64 total_count, diff, ret; - u32 count1, count2, count3, m = 0, c = 0; - unsigned long now = jiffies_to_msecs(jiffies), diff1; + u64 total_count; + ktime_t dt, now; + u32 m = 0, c = 0; int i; lockdep_assert_held(&mchdev_lock); - diff1 = now - ips->last_time1; + /* FIXME: handle per-counter overflow */ + + total_count = I915_READ(DMIEC); + total_count += I915_READ(DDREC); + total_count += I915_READ(CSIEC); + now = ktime_get_raw(); /* * Prevent division-by-zero if we are asking too fast. @@ -1874,23 +2028,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) * faster than once in 10ms, so just return the saved value * in such cases. */ - if (diff1 <= 10) + dt = ktime_sub(now, ips->last_time1); + if (ktime_to_ms(dt) <= 10) return ips->chipset_power; - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); - - total_count = count1 + count2 + count3; - - /* FIXME: handle per-counter overflow */ - if (total_count < ips->last_count1) { - diff = ~0UL - ips->last_count1; - diff += total_count; - } else { - diff = total_count - ips->last_count1; - } - for (i = 0; i < ARRAY_SIZE(cparams); i++) { if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) { m = cparams[i].m; @@ -1899,16 +2040,13 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) } } - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); + ips->chipset_power = div_u64(m * (total_count - ips->last_count1) + c, + ktime_to_ms(dt) * 10); ips->last_count1 = total_count; ips->last_time1 = now; - ips->chipset_power = ret; - - return ret; + return ips->chipset_power; } unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) @@ -1919,11 +2057,11 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) return 0; intel_runtime_pm_get(dev_priv); - spin_lock_irq(&mchdev_lock); + spin_lock(&mchdev_lock); val = __i915_chipset_val(dev_priv); - spin_unlock_irq(&mchdev_lock); + spin_unlock(&mchdev_lock); intel_runtime_pm_put(dev_priv); return val; @@ -1941,7 +2079,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv) b = tsfs & TSFS_INTR_MASK; - return ((m * x) / 127) - b; + return m * x / 127 - b; } static int _pxvid_to_vd(u8 pxvid) @@ -1969,49 +2107,31 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) { struct intel_ips *ips = &dev_priv->gt_pm.ips; - u64 now, diff, diffms; + ktime_t now, dt; u32 count; lockdep_assert_held(&mchdev_lock); - now = ktime_get_raw_ns(); - diffms = now - ips->last_time2; - do_div(diffms, NSEC_PER_MSEC); + count = I915_READ(GFXEC); - /* Don't divide by 0 */ - if (!diffms) + now = ktime_get_raw(); + dt = ktime_sub(now, ips->last_time2); + if (ktime_to_ms(dt) <= 10) return; - count = I915_READ(GFXEC); - - if (count < ips->last_count2) { - diff = ~0UL - ips->last_count2; - diff += count; - } else { - diff = count - ips->last_count2; - } + /* More magic constants... */ + ips->gfx_power = div_u64(1181ull * (count - ips->last_count2), + ktime_to_ms(dt) * 10); ips->last_count2 = count; ips->last_time2 = now; - - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - ips->gfx_power = diff; } -void i915_update_gfx_val(struct drm_i915_private *dev_priv) +static void gen5_update_gfx_val(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) != 5) - return; - - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&mchdev_lock); - + spin_lock(&mchdev_lock); __i915_update_gfx_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); - intel_runtime_pm_put(dev_priv); + spin_unlock(&mchdev_lock); } static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) @@ -2042,7 +2162,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) corr = corr * ((150142 * state1) / 10000 - 78642); corr /= 100000; - corr2 = (corr * ips->corr); + corr2 = corr * ips->corr; state2 = (corr2 * state1) / 10000; state2 /= 100; /* convert to mW */ @@ -2060,11 +2180,11 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) return 0; intel_runtime_pm_get(dev_priv); - spin_lock_irq(&mchdev_lock); + spin_lock(&mchdev_lock); val = __i915_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); + spin_unlock(&mchdev_lock); intel_runtime_pm_put(dev_priv); return val; @@ -2102,8 +2222,10 @@ unsigned long i915_read_mch_val(void) intel_runtime_pm_get(i915); spin_lock_irq(&mchdev_lock); + chipset_val = __i915_chipset_val(i915); graphics_val = __i915_gfx_val(i915); + spin_unlock_irq(&mchdev_lock); intel_runtime_pm_put(i915); @@ -2112,30 +2234,36 @@ unsigned long i915_read_mch_val(void) } EXPORT_SYMBOL_GPL(i915_read_mch_val); -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) +static bool ips_adjust(int dir) { struct drm_i915_private *i915; - struct intel_ips *ips; + struct intel_rps *rps; + u8 old, new; i915 = mchdev_get(); if (!i915) return false; - ips = &i915->gt_pm.ips; + rps = &i915->gt_pm.rps; - spin_lock_irq(&mchdev_lock); - if (ips->max_delay > ips->fmax) - ips->max_delay--; - spin_unlock_irq(&mchdev_lock); + old = READ_ONCE(rps->max_freq_soft); + new = clamp_t(int, old + dir, rps->min_freq_hw, rps->max_freq_hw); + if (cmpxchg(&rps->max_freq_soft, old, new) == old) + schedule_work(&rps->work); drm_dev_put(&i915->drm); return true; } + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + return ips_adjust(+1); +} EXPORT_SYMBOL_GPL(i915_gpu_raise); /** @@ -2146,22 +2274,7 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise); */ bool i915_gpu_lower(void) { - struct drm_i915_private *i915; - struct intel_ips *ips; - - i915 = mchdev_get(); - if (!i915) - return false; - - ips = &i915->gt_pm.ips; - - spin_lock_irq(&mchdev_lock); - if (ips->max_delay < ips->min_delay) - ips->max_delay++; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; + return ips_adjust(-1); } EXPORT_SYMBOL_GPL(i915_gpu_lower); @@ -2172,16 +2285,13 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; + bool ret = false; - ret = i915->gt.awake; + rcu_read_lock(); + if (i915_mch_dev) + ret = READ_ONCE(i915_mch_dev)->gt.awake; + rcu_read_unlock(); - drm_dev_put(&i915->drm); return ret; } EXPORT_SYMBOL_GPL(i915_gpu_busy); @@ -2195,22 +2305,33 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy); bool i915_gpu_turbo_disable(void) { struct drm_i915_private *i915; - bool ret; i915 = mchdev_get(); if (!i915) return false; - spin_lock_irq(&mchdev_lock); - i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart; - ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart); - spin_unlock_irq(&mchdev_lock); + intel_gt_pm_disable_rps(i915); drm_dev_put(&i915->drm); - return ret; + return true; } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); +bool i915_gpu_turbo_enable(void) +{ + struct drm_i915_private *i915; + + i915 = mchdev_get(); + if (!i915) + return false; + + intel_gt_pm_enable_rps(i915); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable); + /** * Tells the intel_ips driver that the i915 driver is now loaded, if * IPS got loaded first. @@ -2247,85 +2368,15 @@ void intel_gpu_ips_teardown(void) smp_store_mb(i915_mch_dev, NULL); } -static void intel_init_emon(struct drm_i915_private *dev_priv) -{ - u32 lcfuse; - u8 pxw[16]; - int i; - - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); - - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); - - for (i = 0; i < 5; i++) - I915_WRITE(PEW(i), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW(i), 0); - - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ(i)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; - - val = vid * vid; - val *= freq / 1000; - val *= 255; - val /= 127*127*900; - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; - - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW(i), val); - } - - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); - - for (i = 0; i < 8; i++) - I915_WRITE(PXWL(i), 0); - - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); - - lcfuse = I915_READ(LCFUSE02); - - dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} - void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv) { intel_gt_pm_disable_rps(dev_priv); intel_gt_pm_disable_rc6(dev_priv); - if (INTEL_GEN(dev_priv) < 11) - gen6_reset_rps_interrupts(dev_priv); - else + if (INTEL_GEN(dev_priv) >= 11) WARN_ON_ONCE(1); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_reset_rps_interrupts(dev_priv); } void intel_gt_pm_init(struct drm_i915_private *dev_priv) @@ -2377,6 +2428,8 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv) valleyview_init_gt_powersave(dev_priv); else if (INTEL_GEN(dev_priv) >= 6) gen6_init_rps_frequencies(dev_priv); + else if (INTEL_GEN(dev_priv) >= 5) + gen5_init_gt_powersave(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ rps->idle_freq = rps->min_freq_hw; @@ -2404,6 +2457,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv) } } + rps->max_freq_soft = rps->max_freq_hw; + rps->min_freq_soft = rps->min_freq_hw; + /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq_hw; @@ -2453,7 +2509,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv) gen6_enable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 5) { ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); } WARN_ON(rps->max_freq_hw < rps->min_freq_hw); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1ad86ee668d8..027c87489397 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -186,8 +186,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) break; } - dev_priv->gt_pm.ips.r_t = dev_priv->mem_freq; - switch (csipll & 0x3ff) { case 0x00c: dev_priv->fsb_freq = 3200; @@ -216,14 +214,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 0; break; } - - if (dev_priv->fsb_freq == 3200) { - dev_priv->gt_pm.ips.c_m = 0; - } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { - dev_priv->gt_pm.ips.c_m = 1; - } else { - dev_priv->gt_pm.ips.c_m = 2; - } } static const struct cxsr_latency cxsr_latency_table[] = { diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index a0c95853fd3f..da7443baff55 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -336,6 +336,7 @@ struct ips_driver { bool (*gpu_lower)(void); bool (*gpu_busy)(void); bool (*gpu_turbo_disable)(void); + bool (*gpu_turbo_enable)(void); /* For restoration at unload */ u64 orig_turbo_limit; @@ -575,7 +576,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips) { if (ips->__gpu_turbo_on) return; - ips->__gpu_turbo_on = true; + + if (!ips->gpu_turbo_enable()) + dev_err(ips->dev, "failed to enable graphics turbo\n"); + else + ips->__gpu_turbo_on = true; } /** @@ -1432,9 +1437,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips) ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable); if (!ips->gpu_turbo_disable) goto out_put_busy; + ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable); + if (!ips->gpu_turbo_enable) + goto out_put_disable; return true; +out_put_disable: + symbol_put(i915_gpu_turbo_disable); out_put_busy: symbol_put(i915_gpu_busy); out_put_lower: @@ -1676,6 +1686,8 @@ static void ips_remove(struct pci_dev *dev) symbol_put(i915_gpu_busy); if (ips->gpu_turbo_disable) symbol_put(i915_gpu_turbo_disable); + if (ips->gpu_turbo_enable) + symbol_put(i915_gpu_turbo_enable); rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN); diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index c9e5a6621b95..6ee5d77cc923 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void); extern bool i915_gpu_lower(void); extern bool i915_gpu_busy(void); extern bool i915_gpu_turbo_disable(void); +extern bool i915_gpu_turbo_enable(void); /* Exported from arch/x86/kernel/early-quirks.c */ extern struct resource intel_graphics_stolen_res; -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx