As we continue to struggle to unravel the mysteries of sending a seqno along with notification interrupt from the GPU to the CPU, fallback to polling the seqno. We know that the seqno write eventually becomes visible to the CPU as we find the seqno value has been updated during hangcheck, so this method simply runs a hangcheck much faster whilst we are waiting for a request to finish. Optimistically such waits (of the CPU waiting for the GPU to complete) are rare and complete quickly, and we can migrate most to using semaphores. Or in other words: yes, this is a large amount of duct-tape. For testing, set i915.irq_notify=3 References: https://bugs.freedesktop.org/show_bug.cgi?id=38862 Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- This is more or less debugging code that provides a workaround for the problems we have so far encountered with the IRQ/seqno race. Even after we find the correct solution, having an option to fallback to polling may help debug other problems (for example it would have been useful during the gen3 lost irqs) and so is worth supporting. The only question is whether we actually enable polling by default. Eugeni's initial powermeter readings suggested that it had no noticeable impact, but we need confirmation to be sure. And the other question is whether the 1ms poll interval will lead to micro-stuttering and less-than-ideal performance. -Chris --- drivers/gpu/drm/i915/i915_drv.c | 8 ++ drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_gem.c | 8 +- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 190 +++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +- 6 files changed, 130 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 15bfa91..36e2938 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -103,6 +103,14 @@ MODULE_PARM_DESC(enable_hangcheck, "WARNING: Disabling this can cause system wide hangs. " "(default: true)"); +unsigned int i915_irq_notify __read_mostly = I915_IRQ_NOTIFY_INTERRUPT; +module_param_named(irq_notify, i915_irq_notify, int, 0600); +MODULE_PARM_DESC(irq_notify, + "Choose the request notification method, can be any " + "combination of irq (0x1) or polling (0x2). " + "WARNING: Selecting no method will result in busy-waits. " + "(default: is to use irq only)"); + static struct drm_driver driver; extern int intel_agp_enabled; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a9c1b9..49fa8e9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1006,6 +1006,10 @@ extern unsigned int i915_enable_rc6 __read_mostly; extern int i915_enable_fbc __read_mostly; extern bool i915_enable_hangcheck __read_mostly; +extern unsigned int i915_irq_notify __read_mostly; +#define I915_IRQ_NOTIFY_INTERRUPT 0x1 +#define I915_IRQ_NOTIFY_POLL 0x2 + extern int i915_suspend(struct drm_device *dev, pm_message_t state); extern int i915_resume(struct drm_device *dev); extern int i915_master_create(struct drm_device *dev, struct drm_master *master); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 60ff1b6..8eadcc5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1995,7 +1995,7 @@ i915_wait_request(struct intel_ring_buffer *ring, trace_i915_gem_request_wait_begin(ring, seqno); ring->waiting_seqno = seqno; - if (ring->irq_get(ring)) { + if (intel_ring_get_irq(ring)) { if (dev_priv->mm.interruptible) ret = wait_event_interruptible(ring->irq_queue, i915_seqno_passed(ring->get_seqno(ring), seqno) @@ -2005,7 +2005,7 @@ i915_wait_request(struct intel_ring_buffer *ring, i915_seqno_passed(ring->get_seqno(ring), seqno) || atomic_read(&dev_priv->mm.wedged)); - ring->irq_put(ring); + intel_ring_put_irq(ring); } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring), seqno) || atomic_read(&dev_priv->mm.wedged), 3000)) @@ -3306,11 +3306,11 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) * generation is designed to be run atomically and so is * lockless. */ - if (ring->irq_get(ring)) { + if (intel_ring_get_irq(ring)) { ret = wait_event_interruptible(ring->irq_queue, i915_seqno_passed(ring->get_seqno(ring), seqno) || atomic_read(&dev_priv->mm.wedged)); - ring->irq_put(ring); + intel_ring_put_irq(ring); if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) ret = -EIO; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b40004b..b52bbf0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1400,10 +1400,10 @@ static int i915_wait_irq(struct drm_device * dev, int irq_nr) if (master_priv->sarea_priv) master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; - if (ring->irq_get(ring)) { + if (intel_ring_get_irq(ring)) { DRM_WAIT_ON(ret, ring->irq_queue, 3 * DRM_HZ, READ_BREADCRUMB(dev_priv) >= irq_nr); - ring->irq_put(ring); + intel_ring_put_irq(ring); } else if (wait_for(READ_BREADCRUMB(dev_priv) >= irq_nr, 3000)) ret = -EBUSY; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ca70e2f..5b8db53 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -261,6 +261,14 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) return I915_READ(acthd_reg); } +static void intel_ring_irq_elapsed(unsigned long data) +{ + struct intel_ring_buffer *ring = (struct intel_ring_buffer *)data; + + wake_up_all(&ring->irq_queue); + mod_timer(&ring->irq_timer, jiffies + msecs_to_jiffies(1)); +} + static int init_ring_common(struct intel_ring_buffer *ring) { drm_i915_private_t *dev_priv = ring->dev->dev_private; @@ -325,6 +333,8 @@ static int init_ring_common(struct intel_ring_buffer *ring) ring->space = ring_space(ring); } + setup_timer(&ring->irq_timer, intel_ring_irq_elapsed, + (unsigned long) ring); return 0; } @@ -675,26 +685,17 @@ i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask) POSTING_READ(IMR); } -static bool +static void render_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - if (!dev->irq_enabled) - return false; - - spin_lock(&ring->irq_lock); - if (ring->irq_refcount++ == 0) { - if (HAS_PCH_SPLIT(dev)) - ironlake_enable_irq(dev_priv, - GT_PIPE_NOTIFY | GT_USER_INTERRUPT); - else - i915_enable_irq(dev_priv, I915_USER_INTERRUPT); - } - spin_unlock(&ring->irq_lock); - - return true; + if (HAS_PCH_SPLIT(dev)) + ironlake_enable_irq(dev_priv, + GT_PIPE_NOTIFY | GT_USER_INTERRUPT); + else + i915_enable_irq(dev_priv, I915_USER_INTERRUPT); } static void @@ -703,16 +704,12 @@ render_ring_put_irq(struct intel_ring_buffer *ring) struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&ring->irq_lock); - if (--ring->irq_refcount == 0) { - if (HAS_PCH_SPLIT(dev)) - ironlake_disable_irq(dev_priv, - GT_USER_INTERRUPT | - GT_PIPE_NOTIFY); - else - i915_disable_irq(dev_priv, I915_USER_INTERRUPT); - } - spin_unlock(&ring->irq_lock); + if (HAS_PCH_SPLIT(dev)) + ironlake_disable_irq(dev_priv, + GT_USER_INTERRUPT | + GT_PIPE_NOTIFY); + else + i915_disable_irq(dev_priv, I915_USER_INTERRUPT); } void intel_ring_setup_status_page(struct intel_ring_buffer *ring) @@ -786,24 +783,15 @@ ring_add_request(struct intel_ring_buffer *ring, return 0; } -static bool +static void gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) { struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - if (!dev->irq_enabled) - return false; - - spin_lock(&ring->irq_lock); - if (ring->irq_refcount++ == 0) { - ring->irq_mask &= ~rflag; - I915_WRITE_IMR(ring, ring->irq_mask); - ironlake_enable_irq(dev_priv, gflag); - } - spin_unlock(&ring->irq_lock); - - return true; + ring->irq_mask &= ~rflag; + I915_WRITE_IMR(ring, ring->irq_mask); + ironlake_enable_irq(dev_priv, gflag); } static void @@ -812,49 +800,33 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&ring->irq_lock); - if (--ring->irq_refcount == 0) { - ring->irq_mask |= rflag; - I915_WRITE_IMR(ring, ring->irq_mask); - ironlake_disable_irq(dev_priv, gflag); - } - spin_unlock(&ring->irq_lock); + ring->irq_mask |= rflag; + I915_WRITE_IMR(ring, ring->irq_mask); + ironlake_disable_irq(dev_priv, gflag); } -static bool +static void bsd_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - if (!dev->irq_enabled) - return false; - - spin_lock(&ring->irq_lock); - if (ring->irq_refcount++ == 0) { - if (IS_G4X(dev)) - i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); - else - ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); - } - spin_unlock(&ring->irq_lock); - - return true; + if (IS_G4X(dev)) + i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); + else + ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); } + static void bsd_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - spin_lock(&ring->irq_lock); - if (--ring->irq_refcount == 0) { - if (IS_G4X(dev)) - i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); - else - ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); - } - spin_unlock(&ring->irq_lock); + if (IS_G4X(dev)) + i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); + else + ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); } static int @@ -1055,6 +1027,12 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) if (ring->obj == NULL) return; + /* The timer should have been removed during irq_put, however + * that was done asynchronously. So redo it in case it has not + * yet expired. + */ + del_timer_sync(&ring->irq_timer); + /* Disable the ring buffer. The ring must be idle at this point */ dev_priv = ring->dev->dev_private; ret = intel_wait_ring_idle(ring); @@ -1274,36 +1252,36 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, return 0; } -static bool +static void gen6_render_ring_get_irq(struct intel_ring_buffer *ring) { - return gen6_ring_get_irq(ring, - GT_USER_INTERRUPT, - GEN6_RENDER_USER_INTERRUPT); + gen6_ring_get_irq(ring, + GT_USER_INTERRUPT, + GEN6_RENDER_USER_INTERRUPT); } static void gen6_render_ring_put_irq(struct intel_ring_buffer *ring) { - return gen6_ring_put_irq(ring, - GT_USER_INTERRUPT, - GEN6_RENDER_USER_INTERRUPT); + gen6_ring_put_irq(ring, + GT_USER_INTERRUPT, + GEN6_RENDER_USER_INTERRUPT); } -static bool +static void gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) { - return gen6_ring_get_irq(ring, - GT_GEN6_BSD_USER_INTERRUPT, - GEN6_BSD_USER_INTERRUPT); + gen6_ring_get_irq(ring, + GT_GEN6_BSD_USER_INTERRUPT, + GEN6_BSD_USER_INTERRUPT); } static void gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) { - return gen6_ring_put_irq(ring, - GT_GEN6_BSD_USER_INTERRUPT, - GEN6_BSD_USER_INTERRUPT); + gen6_ring_put_irq(ring, + GT_GEN6_BSD_USER_INTERRUPT, + GEN6_BSD_USER_INTERRUPT); } /* ring buffer for Video Codec for Gen6+ */ @@ -1329,12 +1307,12 @@ static const struct intel_ring_buffer gen6_bsd_ring = { /* Blitter support (SandyBridge+) */ -static bool +static void blt_ring_get_irq(struct intel_ring_buffer *ring) { - return gen6_ring_get_irq(ring, - GT_BLT_USER_INTERRUPT, - GEN6_BLITTER_USER_INTERRUPT); + gen6_ring_get_irq(ring, + GT_BLT_USER_INTERRUPT, + GEN6_BLITTER_USER_INTERRUPT); } static void @@ -1554,3 +1532,45 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) return intel_init_ring_buffer(dev, ring); } + +bool intel_ring_get_irq(struct intel_ring_buffer *ring) +{ + if (!ring->dev->irq_enabled) + return false; + + if (i915_irq_notify == 0) + return false; + + spin_lock(&ring->irq_lock); + if (ring->irq_refcount++ == 0) { + ring->irq_notify = i915_irq_notify; + + if (ring->irq_notify & I915_IRQ_NOTIFY_INTERRUPT) + ring->irq_get(ring); + + /* Install a safety net. In the event of the + * unthinkable that we experience incoherency + * between the seqno and its notification interrupt, + * we resort to polling. + */ + if (ring->irq_notify & I915_IRQ_NOTIFY_POLL) + mod_timer(&ring->irq_timer, + jiffies + msecs_to_jiffies(1)); + } + spin_unlock(&ring->irq_lock); + + return true; +} + +void intel_ring_put_irq(struct intel_ring_buffer *ring) +{ + spin_lock(&ring->irq_lock); + if (--ring->irq_refcount == 0) { + if (ring->irq_notify & I915_IRQ_NOTIFY_POLL) + del_timer(&ring->irq_timer); + + if (ring->irq_notify & I915_IRQ_NOTIFY_INTERRUPT) + ring->irq_put(ring); + } + spin_unlock(&ring->irq_lock); +} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 68281c9..eb60f12 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -54,12 +54,14 @@ struct intel_ring_buffer { spinlock_t irq_lock; u32 irq_refcount; + struct timer_list irq_timer; u32 irq_mask; u32 irq_seqno; /* last seq seem at irq time */ + unsigned int irq_notify; u32 trace_irq_seqno; u32 waiting_seqno; u32 sync_seqno[I915_NUM_RINGS-1]; - bool __must_check (*irq_get)(struct intel_ring_buffer *ring); + void (*irq_get)(struct intel_ring_buffer *ring); void (*irq_put)(struct intel_ring_buffer *ring); int (*init)(struct intel_ring_buffer *ring); @@ -193,9 +195,12 @@ int intel_init_blt_ring_buffer(struct drm_device *dev); u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); void intel_ring_setup_status_page(struct intel_ring_buffer *ring); +bool __must_check intel_ring_get_irq(struct intel_ring_buffer *ring); +void intel_ring_put_irq(struct intel_ring_buffer *ring); + static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) { - if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) + if (ring->trace_irq_seqno == 0 && intel_ring_get_irq(ring)) ring->trace_irq_seqno = seqno; } -- 1.7.7.3