On Wed, Sep 13, 2017 at 07:18:44PM +0100, Chris Wilson wrote: > The goal here is to trim an excess posting read and keep the predicates > tight (reusing the same predicate throughout for GT ack/handling). > > add/remove: 0/0 grow/shrink: 2/1 up/down: 26/-30 (-4) > function old new delta > gen8_gt_irq_handler 282 301 +19 > cherryview_irq_handler 450 457 +7 > gen8_irq_handler 1653 1623 -30 > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_irq.c | 54 +++++++++++++++++++++++------------------ > 1 file changed, 30 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 91a2c5dbf2da..e12321cb7403 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1375,31 +1375,34 @@ static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, > } > > static void gen8_gt_irq_handler(struct drm_i915_private *dev_priv, > - u32 gt_iir[4]) > + u32 master_ctl, u32 gt_iir[4]) > { > - if (gt_iir[0]) { > + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { > gen8_cs_irq_handler(dev_priv->engine[RCS], > gt_iir[0], GEN8_RCS_IRQ_SHIFT); > gen8_cs_irq_handler(dev_priv->engine[BCS], > gt_iir[0], GEN8_BCS_IRQ_SHIFT); > } > > - if (gt_iir[1]) { > + if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { > gen8_cs_irq_handler(dev_priv->engine[VCS], > gt_iir[1], GEN8_VCS1_IRQ_SHIFT); > gen8_cs_irq_handler(dev_priv->engine[VCS2], > gt_iir[1], GEN8_VCS2_IRQ_SHIFT); > } > > - if (gt_iir[3]) > + if (master_ctl & GEN8_GT_VECS_IRQ) { > gen8_cs_irq_handler(dev_priv->engine[VECS], > gt_iir[3], GEN8_VECS_IRQ_SHIFT); > + } > > - if (gt_iir[2] & dev_priv->pm_rps_events) > - gen6_rps_irq_handler(dev_priv, gt_iir[2]); > + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { > + if (gt_iir[2] & dev_priv->pm_rps_events) > + gen6_rps_irq_handler(dev_priv, gt_iir[2]); > > - if (gt_iir[2] & dev_priv->pm_guc_events) > - gen9_guc_irq_handler(dev_priv, gt_iir[2]); > + if (gt_iir[2] & dev_priv->pm_guc_events) > + gen9_guc_irq_handler(dev_priv, gt_iir[2]); > + } > } > > static bool bxt_port_hotplug_long_detect(enum port port, u32 val) > @@ -1984,7 +1987,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) > I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); > POSTING_READ(GEN8_MASTER_IRQ); > > - gen8_gt_irq_handler(dev_priv, gt_iir); > + gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); > > if (hotplug_status) > i9xx_hpd_irq_handler(dev_priv, hotplug_status); > @@ -2518,36 +2521,39 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) > return ret; > } > > +#define GEN8_GT_IRQ_BITS (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ | \ > + GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ | \ > + GEN8_GT_VECS_IRQ | GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ) > + > static irqreturn_t gen8_irq_handler(int irq, void *arg) > { > - struct drm_device *dev = arg; > - struct drm_i915_private *dev_priv = to_i915(dev); > - u32 master_ctl; > - u32 gt_iir[4] = {}; > - irqreturn_t ret; > + struct drm_i915_private *dev_priv = arg; > + u32 master_ctl, gt_iir[4]; > + irqreturn_t ret = IRQ_NONE; > > if (!intel_irqs_enabled(dev_priv)) > return IRQ_NONE; > > - master_ctl = I915_READ_FW(GEN8_MASTER_IRQ); > - master_ctl &= ~GEN8_MASTER_IRQ_CONTROL; > + master_ctl = I915_READ_FW(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL; > if (!master_ctl) > return IRQ_NONE; > > I915_WRITE_FW(GEN8_MASTER_IRQ, 0); > > - /* IRQs are synced during runtime_suspend, we don't require a wakeref */ > - disable_rpm_wakeref_asserts(dev_priv); > - > /* Find, clear, then process each source of interrupt */ > - ret = gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); > - gen8_gt_irq_handler(dev_priv, gt_iir); > - ret |= gen8_de_irq_handler(dev_priv, master_ctl); > + if (master_ctl & GEN8_GT_IRQ_BITS) > + ret |= gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir); > + > + if (master_ctl & ~GEN8_GT_IRQ_BITS) { > + disable_rpm_wakeref_asserts(dev_priv); Hmm. Why is this needed for DE interrupts but not GT interrupts? Just the _FW() vs. not in the codepaths? If I'm reading things right we still have some non _FW() accesses in the RPS handler at least. BDW+ doesn't suffer from the "hang when accessing the same cacheline from multiple cpus" issue anymore? > + ret |= gen8_de_irq_handler(dev_priv, master_ctl); > + enable_rpm_wakeref_asserts(dev_priv); > + } This thing reminds me that I'd still like to split the DE stuff into ack/handle stuff as well. > > I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); > - POSTING_READ_FW(GEN8_MASTER_IRQ); > > - enable_rpm_wakeref_asserts(dev_priv); > + if (master_ctl & GEN8_GT_IRQ_BITS) > + gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir); > > return ret; > } > -- > 2.14.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx