Re: [PATCH v2] drm/i915: Trim gen11_gt_irq_handler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 08/03/18 17:33, Chris Wilson wrote:
Give the compiler a helping hand in mapping (bank,bit) to our struct
intel_engine_cs by trading object code size for data cache:

add/remove: 2/0 grow/shrink: 0/1 up/down: 48/-135 (-87)
Function                                     old     new   delta
bank1_map                                      -      32     +32
bank0_map                                      -      16     +16
gen11_irq_handler                            706     571    -135

v2: u8 arrays for tighter packing

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_irq.c | 53 +++++++++++------------------------------
  1 file changed, 14 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c8c29d8ecbab..e423ec58e5d2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2762,44 +2762,6 @@ static void __fini_wedge(struct wedge_me *w)
  	     (W)->i915;							\
  	     __fini_wedge((W)))
-static void
-gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
-			    const unsigned int bank,
-			    const unsigned int engine_n,
-			    const u16 iir)
-{
-	struct intel_engine_cs ** const engine = i915->engine;
-
-	switch (bank) {
-	case 0:
-		switch (engine_n) {
-
-		case GEN11_RCS0:
-			return gen8_cs_irq_handler(engine[RCS], iir);
-
-		case GEN11_BCS:
-			return gen8_cs_irq_handler(engine[BCS], iir);
-		}
-	case 1:
-		switch (engine_n) {
-
-		case GEN11_VCS(0):
-			return gen8_cs_irq_handler(engine[_VCS(0)], iir);
-		case GEN11_VCS(1):
-			return gen8_cs_irq_handler(engine[_VCS(1)], iir);
-		case GEN11_VCS(2):
-			return gen8_cs_irq_handler(engine[_VCS(2)], iir);
-		case GEN11_VCS(3):
-			return gen8_cs_irq_handler(engine[_VCS(3)], iir);
-
-		case GEN11_VECS(0):
-			return gen8_cs_irq_handler(engine[_VECS(0)], iir);
-		case GEN11_VECS(1):
-			return gen8_cs_irq_handler(engine[_VECS(1)], iir);
-		}
-	}
-}
-
  static u32
  gen11_gt_engine_intr(struct drm_i915_private * const i915,
  		     const unsigned int bank, const unsigned int bit)
@@ -2836,10 +2798,23 @@ static void
  gen11_gt_irq_handler(struct drm_i915_private * const i915,
  		     const u32 master_ctl)
  {
+	static const u8 bank0_map[] = {
+		[GEN11_RCS0] = RCS,
+		[GEN11_BCS]  = BCS,
+	};
+	static const u8 bank1_map[] = {
+		[GEN11_VCS(0)]  = _VCS(0),
+		[GEN11_VCS(1)]  = _VCS(1),
+		[GEN11_VCS(2)]  = _VCS(2),
+		[GEN11_VCS(3)]  = _VCS(3),
+		[GEN11_VECS(0)] = _VECS(0),
+		[GEN11_VECS(1)] = _VECS(1),
+	};
  	void __iomem * const regs = i915->regs;
  	unsigned int bank;
for (bank = 0; bank < 2; bank++) {
+		const u8 *map = bank ? bank1_map : bank0_map;
  		unsigned long intr_dw;
  		unsigned int bit;
@@ -2859,7 +2834,7 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915,
  			if (unlikely(!iir))
  				continue;
- gen11_gt_engine_irq_handler(i915, bank, bit, iir);
+			gen8_cs_irq_handler(i915->engine[map[bit]], iir);

With guc and PM interrupts we get a couple of non-engine interrupts on bank0, so this may not scale very well depending how we approach it.

My vote still goes to deriving class and instance from the register (bits 16-18 and 20-25 respectively). If we return the full register value from gen11_gt_engine_intr, we can then do something like:

for_each_set_bit(bit, &intr_dw, 32) {
        const u32 ident = gen11_gt_engine_intr(i915, bank, bit);
        u16 iir = ident & GEN11_INTR_ENGINE_MASK;
        u8 class, instance;

        if (unlikely(!iir))
                continue;

        class = (ident >> 16) & (BIT(3) - 1);
        instance = (ident >> 20) &
                (BIT(GEN11_ENGINE_INSTANCE_WIDTH) - 1);

        gen8_cs_irq_handler(i915->engine_class[class][instance], iir);
}

Which saves a bit more code and is more adaptable to non-engine interrupts IMHO, since we have the class and all non-engine interrupts come under class 4.

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-137 (-137)
Function                                     old     new   delta
gen11_irq_handler                            711     574    -137
Total: Before=1487912, After=1487775, chg -0.01%

Downside is that we have a few more instructions in an hot path.

Thanks,
Daniele

  		}
/* Clear must be after shared has been served for engine */

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux