"Mcaulay, Alistair" <alistair.mcaulay@xxxxxxxxx> writes: > Hi Mika, > > can you please review this patch, and verify it fixes the issues in your previous review. > > Thanks, > Alistair. > >> -----Original Message----- >> From: Mcaulay, Alistair >> Sent: Friday, August 15, 2014 6:52 PM >> To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx >> Cc: Mcaulay, Alistair >> Subject: [PATCH v3] drm/i915: Rework GPU reset sequence to match driver >> load & thaw >> >> From: "McAulay, Alistair" <alistair.mcaulay@xxxxxxxxx> >> >> This patch is to address Daniels concerns over different code during reset: >> >> http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html >> >> "The reason for aiming as hard as possible to use the exact same code for >> driver load, gpu reset and runtime pm/system resume is that we've simply >> seen too many bugs due to slight variations and unintended omissions." >> >> Tested using igt drv_hangman. >> >> V2: Cleaner way of preventing check_wedge returning -EAGAIN >> V3: Clean the last_context during reset, to ensure do_switch() does the >> MI_SET_CONTEXT. As per review. >> Signed-off-by: McAulay, Alistair <alistair.mcaulay@xxxxxxxxx> >> --- Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >> drivers/gpu/drm/i915/i915_drv.c | 6 +++ >> drivers/gpu/drm/i915/i915_drv.h | 3 ++ >> drivers/gpu/drm/i915/i915_gem.c | 4 +- >> drivers/gpu/drm/i915/i915_gem_context.c | 33 +++------------- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++---------------------------- >> drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- >> 6 files changed, 28 insertions(+), 88 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.c >> b/drivers/gpu/drm/i915/i915_drv.c index 5e4fefd..3bfafe6 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -806,7 +806,13 @@ int i915_reset(struct drm_device *dev) >> !dev_priv->ums.mm_suspended) { >> dev_priv->ums.mm_suspended = 0; >> >> + /* Used to prevent gem_check_wedged returning -EAGAIN >> during gpu reset */ >> + dev_priv->gpu_error.reload_in_reset = true; >> + >> ret = i915_gem_init_hw(dev); >> + >> + dev_priv->gpu_error.reload_in_reset = false; >> + >> mutex_unlock(&dev->struct_mutex); >> if (ret) { >> DRM_ERROR("Failed hw init on reset %d\n", ret); diff >> --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 991b663..116daff 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1217,6 +1217,9 @@ struct i915_gpu_error { >> >> /* For missed irq/seqno simulation. */ >> unsigned int test_irq_rings; >> + >> + /* Used to prevent gem_check_wedged returning -EAGAIN during >> gpu reset */ >> + bool reload_in_reset; >> }; >> >> enum modeset_restore { >> diff --git a/drivers/gpu/drm/i915/i915_gem.c >> b/drivers/gpu/drm/i915/i915_gem.c index ef047bc..e7396eb 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -1085,7 +1085,9 @@ i915_gem_check_wedge(struct i915_gpu_error >> *error, >> if (i915_terminally_wedged(error)) >> return -EIO; >> >> - return -EAGAIN; >> + /* Check if GPU Reset is in progress */ >> + if (!error->reload_in_reset) >> + return -EAGAIN; >> } >> >> return 0; >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >> b/drivers/gpu/drm/i915/i915_gem_context.c >> index de72a28..9378ad8 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -377,34 +377,17 @@ void i915_gem_context_reset(struct drm_device >> *dev) >> struct drm_i915_private *dev_priv = dev->dev_private; >> int i; >> >> - /* Prevent the hardware from restoring the last context (which >> hung) on >> - * the next switch */ >> for (i = 0; i < I915_NUM_RINGS; i++) { >> struct intel_engine_cs *ring = &dev_priv->ring[i]; >> - struct intel_context *dctx = ring->default_context; >> struct intel_context *lctx = ring->last_context; >> >> - /* Do a fake switch to the default context */ >> - if (lctx == dctx) >> - continue; >> - >> - if (!lctx) >> - continue; >> + if (lctx) { >> + if (lctx->legacy_hw_ctx.rcs_state && i == RCS) >> + i915_gem_object_ggtt_unpin(lctx- >> >legacy_hw_ctx.rcs_state); >> >> - if (dctx->legacy_hw_ctx.rcs_state && i == RCS) { >> - WARN_ON(i915_gem_obj_ggtt_pin(dctx- >> >legacy_hw_ctx.rcs_state, >> - >> get_context_alignment(dev), 0)); >> - /* Fake a finish/inactive */ >> - dctx->legacy_hw_ctx.rcs_state->base.write_domain >> = 0; >> - dctx->legacy_hw_ctx.rcs_state->active = 0; >> + i915_gem_context_unreference(lctx); >> + ring->last_context = NULL; >> } >> - >> - if (lctx->legacy_hw_ctx.rcs_state && i == RCS) >> - i915_gem_object_ggtt_unpin(lctx- >> >legacy_hw_ctx.rcs_state); >> - >> - i915_gem_context_unreference(lctx); >> - i915_gem_context_reference(dctx); >> - ring->last_context = dctx; >> } >> } >> >> @@ -498,10 +481,6 @@ int i915_gem_context_enable(struct >> drm_i915_private *dev_priv) >> ppgtt->enable(ppgtt); >> } >> >> - /* FIXME: We should make this work, even in reset */ >> - if (i915_reset_in_progress(&dev_priv->gpu_error)) >> - return 0; >> - >> BUG_ON(!dev_priv->ring[RCS].default_context); >> >> for_each_ring(ring, dev_priv, i) { >> @@ -645,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring, >> from = ring->last_context; >> >> if (USES_FULL_PPGTT(ring->dev)) { >> - ret = ppgtt->switch_mm(ppgtt, ring, false); >> + ret = ppgtt->switch_mm(ppgtt, ring); >> if (ret) >> goto unpin_out; >> } >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >> b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index 5188936..450c8a9 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -216,19 +216,12 @@ static gen6_gtt_pte_t >> iris_pte_encode(dma_addr_t addr, >> >> /* Broadwell Page Directory Pointer Descriptors */ static int >> gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, >> - uint64_t val, bool synchronous) >> + uint64_t val) >> { >> - struct drm_i915_private *dev_priv = ring->dev->dev_private; >> int ret; >> >> BUG_ON(entry >= 4); >> >> - if (synchronous) { >> - I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); >> - I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); >> - return 0; >> - } >> - >> ret = intel_ring_begin(ring, 6); >> if (ret) >> return ret; >> @@ -245,8 +238,7 @@ static int gen8_write_pdp(struct intel_engine_cs >> *ring, unsigned entry, } >> >> static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, >> - struct intel_engine_cs *ring, >> - bool synchronous) >> + struct intel_engine_cs *ring) >> { >> int i, ret; >> >> @@ -255,7 +247,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt >> *ppgtt, >> >> for (i = used_pd - 1; i >= 0; i--) { >> dma_addr_t addr = ppgtt->pd_dma_addr[i]; >> - ret = gen8_write_pdp(ring, i, addr, synchronous); >> + ret = gen8_write_pdp(ring, i, addr); >> if (ret) >> return ret; >> } >> @@ -724,29 +716,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt >> *ppgtt) } >> >> static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, >> - struct intel_engine_cs *ring, >> - bool synchronous) >> + struct intel_engine_cs *ring) >> { >> - struct drm_device *dev = ppgtt->base.dev; >> - struct drm_i915_private *dev_priv = dev->dev_private; >> int ret; >> >> - /* If we're in reset, we can assume the GPU is sufficiently idle to >> - * manually frob these bits. Ideally we could use the ring functions, >> - * except our error handling makes it quite difficult (can't use >> - * intel_ring_begin, ring->flush, or intel_ring_advance) >> - * >> - * FIXME: We should try not to special case reset >> - */ >> - if (synchronous || >> - i915_reset_in_progress(&dev_priv->gpu_error)) { >> - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); >> - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); >> - I915_WRITE(RING_PP_DIR_BASE(ring), >> get_pd_offset(ppgtt)); >> - POSTING_READ(RING_PP_DIR_BASE(ring)); >> - return 0; >> - } >> - >> /* NB: TLBs must be flushed and invalidated before a switch */ >> ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, >> I915_GEM_GPU_DOMAINS); >> if (ret) >> @@ -768,29 +741,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt >> *ppgtt, } >> >> static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, >> - struct intel_engine_cs *ring, >> - bool synchronous) >> + struct intel_engine_cs *ring) >> { >> - struct drm_device *dev = ppgtt->base.dev; >> - struct drm_i915_private *dev_priv = dev->dev_private; >> int ret; >> >> - /* If we're in reset, we can assume the GPU is sufficiently idle to >> - * manually frob these bits. Ideally we could use the ring functions, >> - * except our error handling makes it quite difficult (can't use >> - * intel_ring_begin, ring->flush, or intel_ring_advance) >> - * >> - * FIXME: We should try not to special case reset >> - */ >> - if (synchronous || >> - i915_reset_in_progress(&dev_priv->gpu_error)) { >> - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); >> - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); >> - I915_WRITE(RING_PP_DIR_BASE(ring), >> get_pd_offset(ppgtt)); >> - POSTING_READ(RING_PP_DIR_BASE(ring)); >> - return 0; >> - } >> - >> /* NB: TLBs must be flushed and invalidated before a switch */ >> ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, >> I915_GEM_GPU_DOMAINS); >> if (ret) >> @@ -819,14 +773,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt >> *ppgtt, } >> >> static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, >> - struct intel_engine_cs *ring, >> - bool synchronous) >> + struct intel_engine_cs *ring) >> { >> struct drm_device *dev = ppgtt->base.dev; >> struct drm_i915_private *dev_priv = dev->dev_private; >> >> - if (!synchronous) >> - return 0; >> >> I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); >> I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); @@ - >> 852,7 +803,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) >> if (USES_FULL_PPGTT(dev)) >> continue; >> >> - ret = ppgtt->switch_mm(ppgtt, ring, true); >> + ret = ppgtt->switch_mm(ppgtt, ring); >> if (ret) >> goto err_out; >> } >> @@ -897,7 +848,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt >> *ppgtt) >> if (USES_FULL_PPGTT(dev)) >> continue; >> >> - ret = ppgtt->switch_mm(ppgtt, ring, true); >> + ret = ppgtt->switch_mm(ppgtt, ring); >> if (ret) >> return ret; >> } >> @@ -926,7 +877,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt >> *ppgtt) >> I915_WRITE(GFX_MODE, >> _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); >> >> for_each_ring(ring, dev_priv, i) { >> - int ret = ppgtt->switch_mm(ppgtt, ring, true); >> + int ret = ppgtt->switch_mm(ppgtt, ring); >> if (ret) >> return ret; >> } >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h >> b/drivers/gpu/drm/i915/i915_gem_gtt.h >> index 8d6f7c1..bf1e4fc 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h >> @@ -262,8 +262,7 @@ struct i915_hw_ppgtt { >> >> int (*enable)(struct i915_hw_ppgtt *ppgtt); >> int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, >> - struct intel_engine_cs *ring, >> - bool synchronous); >> + struct intel_engine_cs *ring); >> void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file >> *m); }; >> >> -- >> 2.0.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx