Hi Chris, Thank you very much for all suggestions. Will do the changes. But for that question below I don't have the clear answer: On Wed, Oct 16, 2013 at 6:15 AM, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote: >> If Userspace isn't using MI_PREDICATE all slices must be enabled for >> backward compatibility. >> >> If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force >> all slices on. >> >> v2: fix the inverted logic for backwards compatibility >> USE_PREDICATE unset force gt_full when defaul is half >> instead of GT_FULL flag. >> >> CC: Eric Anholt <eric@xxxxxxxxxx> >> CC: Kenneth Graunke <kenneth@xxxxxxxxxxxxx> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 8 ++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++ >> drivers/gpu/drm/i915/i915_reg.h | 11 +++++ >> drivers/gpu/drm/i915/i915_sysfs.c | 11 ++++- >> drivers/gpu/drm/i915/intel_display.c | 6 +++ >> drivers/gpu/drm/i915/intel_drv.h | 1 + >> drivers/gpu/drm/i915/intel_pm.c | 39 ++++++++++++++++- >> include/uapi/drm/i915_drm.h | 8 +++- >> 8 files changed, 146 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 685fb1d..bd4774e 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1219,6 +1219,12 @@ struct i915_package_c8 { >> } regsave; >> }; >> >> +struct i915_gt_slices { >> + int state_default; > > Either use it as an integer (a count of slices enabled by default) or > make it an enum. > >> + int forcing_full; > > bool legacy_userspace_busy; ? > >> + struct mutex m_lock; > > Ugh. Just struct mutex lock; /* locks all access to this struct and > slice registers */ will suffice > >> +}; >> + >> typedef struct drm_i915_private { >> struct drm_device *dev; >> struct kmem_cache *slab; >> @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { >> >> struct i915_package_c8 pc8; >> >> + struct i915_gt_slices gt_slices; >> + >> /* Old dri1 support infrastructure, beware the dragons ya fools entering >> * here! */ >> struct i915_dri1_state dri1; >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index 0ce0d47..eb09a51 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, >> } >> >> static int >> +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring) > > That's a misnomer. > >> +{ >> + drm_i915_private_t *dev_priv = dev->dev_private; >> + int ret; >> + >> + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) >> + return 0; >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_GT_SLICE_INFO); >> + intel_ring_emit(ring, SLICE_SEL_BOTH); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); >> + intel_ring_emit(ring, LOWER_SLICE_ENABLED); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_SLICESHUTDOWN); >> + intel_ring_emit(ring, ~SLICE_SHUTDOWN); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_1US); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); >> + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_5US); >> + intel_ring_advance(ring); >> + >> + return 0; >> +} >> + >> +static int >> i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> struct drm_file *file, >> struct drm_i915_gem_execbuffer2 *args, >> @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> return -EINVAL; >> } >> >> + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && >> + dev_priv->gt_slices.state_default == 0 && >> + !dev_priv->gt_slices.forcing_full) { > > Always set legacy_userspace_busy here so that a change in state_default > cannot possibly break currently active users i.e. they are independent > bookkeeping. > >> + dev_priv->gt_slices.forcing_full = true; >> + i915_gt_full_immediate(dev, ring); > > What happens when this fails? If it only partially emits the commands, > etc. This is a very good question. If it fails, my only concern would be the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I don't know how to do this during the exec buf without delaying the execution. Do you have any suggestion? > >> + } >> + >> mode = args->flags & I915_EXEC_CONSTANTS_MASK; >> mask = I915_EXEC_CONSTANTS_MASK; >> switch (mode) { >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index 497c441..0146bef 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -277,6 +277,17 @@ >> #define SLICE_STATUS_MAIN_ON (2<<0) >> #define SLICE_STATUS_BOTH_ON (3<<0) >> >> +#define HSW_SLICESHUTDOWN 0xA190 >> +#define SLICE_SHUTDOWN (1<<0) >> + >> +#define RC_IDLE_MAX_COUNT 0x2054 >> +#define CS_IDLE_COUNT_1US (1<<1) >> +#define CS_IDLE_COUNT_5US (1<<3) >> + >> +#define WAIT_FOR_RC6_EXIT 0x20CC >> +#define WAIT_RC6_EXIT (1<<0) >> +#define MASK_WAIT_RC6_EXIT (1<<16) >> + >> /* >> * 3D instructions used by the kernel >> */ >> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c >> index 86ccd52..25b0c5b 100644 >> --- a/drivers/gpu/drm/i915/i915_sysfs.c >> +++ b/drivers/gpu/drm/i915/i915_sysfs.c >> @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev, >> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); >> struct drm_device *dev = minor->dev; >> struct drm_i915_private *dev_priv = dev->dev_private; >> + bool full; >> >> - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == >> - LOWER_SLICE_ENABLED ? "full" : "half"); >> + mutex_lock(&dev_priv->gt_slices.m_lock); >> + full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED; >> + mutex_unlock(&dev_priv->gt_slices.m_lock); > > This locking is not stopping any races - it is superfluous. > >> + return sprintf(buf, "%s\n", full ? "full" : "half"); >> } >> >> static ssize_t gt_slice_config_store(struct device *kdev, >> @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, >> { >> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); >> struct drm_device *dev = minor->dev; >> + struct drm_i915_private *dev_priv = dev->dev_private; >> int ret; >> >> if (!strncmp(buf, "full", sizeof("full") - 1)) { >> ret = intel_set_gt_full(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 1; >> } else if (!strncmp(buf, "half", sizeof("half") - 1)) { >> ret = intel_set_gt_half(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 0; >> } else >> return -EINVAL; >> return count; >> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c >> index 4f1b636..1a2f8bc 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) >> >> if (dev_priv->info->gen >= 6) >> gen6_rps_idle(dev->dev_private); >> + >> + if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full && >> + dev_priv->gt_slices.state_default == 0) { >> + intel_set_gt_half_async(dev); >> + dev_priv->gt_slices.forcing_full = false; > > Again, you want to be ignoring state_default when changing > legacy_userspace_busy. > > The lower level slice config handling code should be making the judgment > based on all parameters after one changes. > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre -- Rodrigo Vivi Blog: http://blog.vivi.eng.br _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx