Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > The compiler is not automatically caching the i915->regs address inside > a register and emitting a load for every mmio access. For simple > functions like gen8_gt_irq_handler that are already using the raw > accessors, we can open-code them for substantial savings: > > add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-83 (-83) > Function old new delta > gen8_gt_irq_handler 290 266 -24 > gen8_gt_irq_ack 181 122 -59 > Total: Before=954637, After=954554, chg -0.01% > > v2: Add raw_reg_read/raw_reg_write. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > And so begins the long haul of de-I915_READ/WRITE-ing the driver... > --- > drivers/gpu/drm/i915/i915_irq.c | 58 ++++++++++++++++++------------------- > drivers/gpu/drm/i915/intel_uncore.h | 5 ++++ > 2 files changed, 33 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index c7f6b719e86d..17de6cef2a30 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1413,9 +1413,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) > tasklet_hi_schedule(&execlists->tasklet); > } > > -static void gen8_gt_irq_ack(struct drm_i915_private *dev_priv, > +static void gen8_gt_irq_ack(struct drm_i915_private *i915, > u32 master_ctl, u32 gt_iir[4]) > { > + void __iomem * const regs = i915->regs; > + > #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ > GEN8_GT_BCS_IRQ | \ > GEN8_GT_VCS1_IRQ | \ > @@ -1425,62 +1427,58 @@ static void gen8_gt_irq_ack(struct drm_i915_private *dev_priv, > GEN8_GT_GUC_IRQ) > > if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { > - gt_iir[0] = I915_READ_FW(GEN8_GT_IIR(0)); > - if (gt_iir[0]) > - I915_WRITE_FW(GEN8_GT_IIR(0), gt_iir[0]); > + gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); > + if (likely(gt_iir[0])) > + raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); > } > > if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { > - gt_iir[1] = I915_READ_FW(GEN8_GT_IIR(1)); > - if (gt_iir[1]) > - I915_WRITE_FW(GEN8_GT_IIR(1), gt_iir[1]); > + gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); > + if (likely(gt_iir[1])) > + raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); > } > > - if (master_ctl & GEN8_GT_VECS_IRQ) { > - gt_iir[3] = I915_READ_FW(GEN8_GT_IIR(3)); > - if (gt_iir[3]) > - I915_WRITE_FW(GEN8_GT_IIR(3), gt_iir[3]); > + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { > + gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); > + if (likely(gt_iir[2] & (i915->pm_rps_events | > + i915->pm_guc_events))) > + raw_reg_write(regs, GEN8_GT_IIR(2), > + gt_iir[2] & (i915->pm_rps_events | > + i915->pm_guc_events)); I would gone as far as reg_read reg_write but that might be too far :) We leave te events hanging what are of no interest. As we are doing the masks with these so I find it peculiar that we dont ack everything as the masks should be in place and perhaps notify on events appearing without order in place for them. Well it is not in a scope for this patch tho. So, Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > } > > - if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { > - gt_iir[2] = I915_READ_FW(GEN8_GT_IIR(2)); > - if (gt_iir[2] & (dev_priv->pm_rps_events | > - dev_priv->pm_guc_events)) { > - I915_WRITE_FW(GEN8_GT_IIR(2), > - gt_iir[2] & (dev_priv->pm_rps_events | > - dev_priv->pm_guc_events)); > - } > + if (master_ctl & GEN8_GT_VECS_IRQ) { > + gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); > + if (likely(gt_iir[3])) > + raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); > } > } > > -static void gen8_gt_irq_handler(struct drm_i915_private *dev_priv, > +static void gen8_gt_irq_handler(struct drm_i915_private *i915, > u32 master_ctl, u32 gt_iir[4]) > { > if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { > - gen8_cs_irq_handler(dev_priv->engine[RCS], > + gen8_cs_irq_handler(i915->engine[RCS], > gt_iir[0], GEN8_RCS_IRQ_SHIFT); > - gen8_cs_irq_handler(dev_priv->engine[BCS], > + gen8_cs_irq_handler(i915->engine[BCS], > gt_iir[0], GEN8_BCS_IRQ_SHIFT); > } > > if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { > - gen8_cs_irq_handler(dev_priv->engine[VCS], > + gen8_cs_irq_handler(i915->engine[VCS], > gt_iir[1], GEN8_VCS1_IRQ_SHIFT); > - gen8_cs_irq_handler(dev_priv->engine[VCS2], > + gen8_cs_irq_handler(i915->engine[VCS2], > gt_iir[1], GEN8_VCS2_IRQ_SHIFT); > } > > if (master_ctl & GEN8_GT_VECS_IRQ) { > - gen8_cs_irq_handler(dev_priv->engine[VECS], > + gen8_cs_irq_handler(i915->engine[VECS], > gt_iir[3], GEN8_VECS_IRQ_SHIFT); > } > > if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { > - if (gt_iir[2] & dev_priv->pm_rps_events) > - gen6_rps_irq_handler(dev_priv, gt_iir[2]); > - > - if (gt_iir[2] & dev_priv->pm_guc_events) > - gen9_guc_irq_handler(dev_priv, gt_iir[2]); > + gen6_rps_irq_handler(i915, gt_iir[2]); > + gen9_guc_irq_handler(i915, gt_iir[2]); > } > } > > diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h > index bed019ef000f..53ef77d0c97c 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.h > +++ b/drivers/gpu/drm/i915/intel_uncore.h > @@ -198,4 +198,9 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, > 2, timeout_ms, NULL); > } > > +#define raw_reg_read(base, reg) \ > + readl(base + i915_mmio_reg_offset(reg)) > +#define raw_reg_write(base, reg, value) \ > + writel(value, base + i915_mmio_reg_offset(reg)) > + > #endif /* !__INTEL_UNCORE_H__ */ > -- > 2.16.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx