If Userspace isn't using MI_PREDICATE all slices must be enabled for backward compatibility. If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force all slices on. v2: fix the inverted logic for backwards compatibility USE_PREDICATE unset force gt_full when defaul is half instead of GT_FULL flag. v3: Accepting Chris's suggestions: better variable names; better logic around state_default x legacy_userspace_busy; remove unecessary mutex; v4: Accepting more suggestions from Chris: * Send all LRIs in only one block and don't ignore if it fails. * function name and cleaner code on forcing_full. v5: fix mutex_lock use by Chris. CC: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> CC: Eric Anholt <eric@xxxxxxxxxx> CC: Kenneth Graunke <kenneth@xxxxxxxxxxxxx> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 64 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 11 +++++ drivers/gpu/drm/i915/i915_sysfs.c | 7 ++++ drivers/gpu/drm/i915/intel_display.c | 17 ++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 41 ++++++++++++++++++- include/uapi/drm/i915_drm.h | 8 +++- 8 files changed, 154 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 685fb1d..67bbbce 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1219,6 +1219,12 @@ struct i915_package_c8 { } regsave; }; +struct i915_gt_slices { + int state_default; + int legacy_userspace_busy; + struct mutex lock; /* locks access to this scruct and slice registers */ +}; + typedef struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { struct i915_package_c8 pc8; + struct i915_gt_slices gt_slices; + /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..3ada5b4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -922,6 +922,56 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +static int gt_legacy_userspace_busy(struct intel_ring_buffer *ring) +{ + int ret; + + ret = intel_ring_begin(ring, 18); + if (ret) + return ret; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_GT_SLICE_INFO); + intel_ring_emit(ring, SLICE_SEL_BOTH); + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); + intel_ring_emit(ring, LOWER_SLICE_ENABLED); + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_SLICESHUTDOWN); + intel_ring_emit(ring, ~SLICE_SHUTDOWN); + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_1US); + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); + intel_ring_emit(ring, _MASKED_BIT_ENABLE(WAIT_RC6_EXIT)); + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_5US); + + intel_ring_advance(ring); + return 0; +} + +static bool gt_legacy_userspace(struct intel_ring_buffer *ring, + struct drm_i915_gem_execbuffer2 *args) +{ + drm_i915_private_t *dev_priv = ring->dev->dev_private; + + if (ring->id == BCS) + return false; + + if (!HAS_SLICE_SHUTDOWN(ring->dev)) + return false; + + return (args->flags & I915_EXEC_USE_PREDICATE) == 0; +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -935,6 +985,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; struct i915_ctx_hang_stats *hs; + struct i915_gt_slices *gt_slices = &dev_priv->gt_slices; u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 mask, flags; @@ -999,6 +1050,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (gt_legacy_userspace(ring, args)) { + mutex_lock(>_slices->lock); + if (gt_slices->state_default == 0 && + !gt_slices->legacy_userspace_busy) { + ret = gt_legacy_userspace_busy(ring); + if (ret == 0) + gt_slices->legacy_userspace_busy = true; + } + mutex_unlock(>_slices->lock); + if (ret) + return ret; + } + mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; switch (mode) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 497c441..0146bef 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -277,6 +277,17 @@ #define SLICE_STATUS_MAIN_ON (2<<0) #define SLICE_STATUS_BOTH_ON (3<<0) +#define HSW_SLICESHUTDOWN 0xA190 +#define SLICE_SHUTDOWN (1<<0) + +#define RC_IDLE_MAX_COUNT 0x2054 +#define CS_IDLE_COUNT_1US (1<<1) +#define CS_IDLE_COUNT_5US (1<<3) + +#define WAIT_FOR_RC6_EXIT 0x20CC +#define WAIT_RC6_EXIT (1<<0) +#define MASK_WAIT_RC6_EXIT (1<<16) + /* * 3D instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 86ccd52..a821499 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -135,16 +135,23 @@ static ssize_t gt_slice_config_store(struct device *kdev, { struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; if (!strncmp(buf, "full", sizeof("full") - 1)) { ret = intel_set_gt_full(dev); if (ret) return ret; + mutex_lock(&dev_priv->gt_slices.lock); + dev_priv->gt_slices.state_default = 1; + mutex_unlock(&dev_priv->gt_slices.lock); } else if (!strncmp(buf, "half", sizeof("half") - 1)) { ret = intel_set_gt_half(dev); if (ret) return ret; + mutex_lock(&dev_priv->gt_slices.lock); + dev_priv->gt_slices.state_default = 0; + mutex_unlock(&dev_priv->gt_slices.lock); } else return -EINVAL; return count; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f1b636..eec4c0e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7759,6 +7759,20 @@ void intel_mark_busy(struct drm_device *dev) i915_update_gfx_val(dev_priv); } +static bool intel_need_shutdown_slices(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + mutex_lock(&dev_priv->gt_slices.lock); + if (dev_priv->gt_slices.legacy_userspace_busy) { + dev_priv->gt_slices.legacy_userspace_busy = false; + mutex_unlock(&dev_priv->gt_slices.lock); + return true; + } + mutex_unlock(&dev_priv->gt_slices.lock); + return false; +} + void intel_mark_idle(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -7778,6 +7792,9 @@ void intel_mark_idle(struct drm_device *dev) if (dev_priv->info->gen >= 6) gen6_rps_idle(dev->dev_private); + + if (intel_need_shutdown_slices(dev)) + intel_set_gt_half_async(dev); } void intel_mark_fb_busy(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a9abbb5..98cd63e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -836,6 +836,7 @@ void intel_disable_gt_powersave(struct drm_device *dev); void ironlake_teardown_rc6(struct drm_device *dev); int intel_set_gt_full(struct drm_device *dev); int intel_set_gt_half(struct drm_device *dev); +void intel_set_gt_half_async(struct drm_device *dev); void intel_init_gt_slices(struct drm_device *dev); void gen6_update_ring_freq(struct drm_device *dev); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 63af075..b3bd70f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3873,6 +3873,7 @@ int intel_set_gt_full(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + mutex_lock(&dev_priv->gt_slices.lock); I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); /* Slices are enabled on RC6 exit */ @@ -3881,13 +3882,18 @@ int intel_set_gt_full(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_BOTH_ON), 2000)) { DRM_ERROR("Timeout enabling full gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return 0; } @@ -3899,6 +3905,7 @@ int intel_set_gt_half(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + mutex_lock(&dev_priv->gt_slices.lock); I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); /* Slices are disabled on RC6 exit */ @@ -3907,16 +3914,42 @@ int intel_set_gt_half(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_MAIN_ON), 2000)) { DRM_ERROR("Timed out disabling half gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); gen6_gt_force_wake_put(dev_priv); + + mutex_unlock(&dev_priv->gt_slices.lock); return 0; } +/** + * On Haswell, slices on/off transitions are done via RC6 sequence. + * This async function allows you to request slices shutdown without waiting. + * Slices will be disabled on next RC6 exit. + */ +void intel_set_gt_half_async(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!HAS_SLICE_SHUTDOWN(dev)) + return; + + mutex_lock(&dev_priv->gt_slices.lock); + if (dev_priv->gt_slices.state_default == 0) { + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + } + mutex_unlock(&dev_priv->gt_slices.lock); +} + void intel_init_gt_slices(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3927,9 +3960,13 @@ void intel_init_gt_slices(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return; + dev_priv->gt_slices.state_default = 1; + dev_priv->gt_slices.legacy_userspace_busy = false; + mutex_init(&dev_priv->gt_slices.lock); + if (!i915_gt_slice_config) { - I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); - I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + dev_priv->gt_slices.state_default = 0; + intel_set_gt_half_async(dev); } } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a4e97b..3fa3e24 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -731,7 +731,13 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +/* If this flag is set userspace is using predicate and half slices can be + * let disabled for power saving. Otherwise use all slices even when disabled + * by boot parameter or via sysfs interface + */ +#define I915_EXEC_USE_PREDICATE (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.7.11.7 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx