Re: [PATCH 2/2] drm/i915: cleanup TLB invalidation code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 07/12/2022 17:36, Andrzej Hajda wrote:
Whole register/bit selection logic has been moved to separate helper.

Why is missing.

Signed-off-by: Andrzej Hajda <andrzej.hajda@xxxxxxxxx>
---
  drivers/gpu/drm/i915/gt/intel_gt.c | 136 +++++++++++------------------
  1 file changed, 51 insertions(+), 85 deletions(-)

Diffstat suggests because more streamlined code. Any other reason?

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f0224b607aa4a7..05520ec3264db8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1003,32 +1003,59 @@ void intel_gt_info_print(const struct intel_gt_info *info,
  	intel_sseu_dump(&info->sseu, p);
  }
-struct reg_and_bit {
+struct reg_and_bits {
  	union {
  		i915_reg_t reg;
  		i915_mcr_reg_t mcr_reg;
  	};
-	u32 bit;
+	u32 bits;
  };
-static struct reg_and_bit
-get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
-		const i915_reg_t *regs, const unsigned int num)
+static struct reg_and_bits
+get_tlb_inv_reg_and_bits(const struct intel_engine_cs *engine, bool write)
  {
+	static const i915_reg_t gen8_regs[MAX_ENGINE_CLASS + 1] = {
+		[RENDER_CLASS]			= GEN8_RTCR,
+		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR,
+		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
+		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
+	};
+	static const i915_reg_t gen12_regs[MAX_ENGINE_CLASS + 1] = {
+		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
+	};
+	static const i915_mcr_reg_t xehp_regs[MAX_ENGINE_CLASS + 1] = {
+		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
+	};
  	const unsigned int class = engine->class;
-	struct reg_and_bit rb = { };
+	struct reg_and_bits rb = { .bits = BIT(engine->instance) };
- if (drm_WARN_ON_ONCE(&engine->i915->drm,
-			     class >= num || !regs[class].reg))
+	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+		rb.mcr_reg = xehp_regs[class];
+	else if (GRAPHICS_VER(engine->i915) >= 12)
+		rb.reg = gen12_regs[class];
+	else if (GRAPHICS_VER(engine->i915) >= 8)
+		rb.reg = gen8_regs[class];
+
+	if (drm_WARN_ON_ONCE(&engine->i915->drm, !i915_mmio_reg_offset(rb.reg)))

I'd prefer user readable message was kept but not a blocker.

  		return rb;
- rb.reg = regs[class];
-	if (gen8 && class == VIDEO_DECODE_CLASS)
-		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
-	else
-		rb.bit = engine->instance;
+	if (GRAPHICS_VER(engine->i915) < 12 && class == VIDEO_DECODE_CLASS) {
+		rb.bits = 1;
+		rb.reg.reg += 4 * engine->instance;

No reason to drop the comment IMO. It explains things somewhat, or at least provides a hint.

+	}
- rb.bit = BIT(rb.bit);
+	if (write && GRAPHICS_VER(engine->i915) >= 12 &&
+	    (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS ||
+	     class == COMPUTE_CLASS))
+		rb.bits = _MASKED_BIT_ENABLE(rb.bits);

This could be else if to not have < 12 followed by explicit >= 12, but perhaps it is clearer like this, to signify it's two completely separate quirks.

Also, I would perhaps consider having a local i915 since there's a good number of engine->i915, but it's up to you what looks nicer.

return rb;
  }
@@ -1046,14 +1073,14 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
   * but are now considered MCR registers.  Since they exist within a GAM range,
   * the primary instance of the register rolls up the status from each unit.
   */
-static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bits rb)
  {
  	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
+		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bits, 0,
  						 TLB_INVAL_TIMEOUT_US,
  						 TLB_INVAL_TIMEOUT_MS);
  	else
-		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bits, 0,
  						    TLB_INVAL_TIMEOUT_US,
  						    TLB_INVAL_TIMEOUT_MS,
  						    NULL);
@@ -1061,50 +1088,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
static void mmio_invalidate_full(struct intel_gt *gt)
  {
-	static const i915_reg_t gen8_regs[] = {
-		[RENDER_CLASS]			= GEN8_RTCR,
-		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
-		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
-	};
-	static const i915_reg_t gen12_regs[] = {
-		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
-	};
-	static const i915_mcr_reg_t xehp_regs[] = {
-		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
-	};
  	struct drm_i915_private *i915 = gt->i915;
  	struct intel_uncore *uncore = gt->uncore;
  	struct intel_engine_cs *engine;
  	intel_engine_mask_t awake, tmp;
  	enum intel_engine_id id;
-	const i915_reg_t *regs;
-	unsigned int num = 0;
  	unsigned long flags;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-		regs = NULL;
-		num = ARRAY_SIZE(xehp_regs);
-	} else if (GRAPHICS_VER(i915) == 12) {
-		regs = gen12_regs;
-		num = ARRAY_SIZE(gen12_regs);
-	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-		regs = gen8_regs;
-		num = ARRAY_SIZE(gen8_regs);
-	} else if (GRAPHICS_VER(i915) < 8) {
-		return;
-	}
-
-	if (drm_WARN_ONCE(&i915->drm, !num,
-			  "Platform does not implement TLB invalidation!"))
+	if (GRAPHICS_VER(i915) < 8)
  		return;
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
@@ -1114,33 +1105,15 @@ static void mmio_invalidate_full(struct intel_gt *gt)
awake = 0;
  	for_each_engine(engine, gt, id) {
-		struct reg_and_bit rb;
+		struct reg_and_bits rb = get_tlb_inv_reg_and_bits(engine, true);

Ugh so actually what was a once per invalidation lookup is now repeated per engine, times two. I wonder if we can do this better. Lets think about it a bit.

Regards,

Tvrtko

if (!intel_engine_pm_is_awake(engine))
  			continue;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			u32 val = BIT(engine->instance);
-
-			if (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS)
-				val = _MASKED_BIT_ENABLE(val);
-			intel_gt_mcr_multicast_write_fw(gt,
-							xehp_regs[engine->class],
-							val);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-			if (!i915_mmio_reg_offset(rb.reg))
-				continue;
-
-			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS))
-				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
-
-			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
-		}
+		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+			intel_gt_mcr_multicast_write_fw(gt, rb.mcr_reg, rb.bits);
+		else
+			intel_uncore_write_fw(uncore, rb.reg, rb.bits);
  		awake |= engine->mask;
  	}
@@ -1159,14 +1132,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
  	intel_gt_mcr_unlock(gt, flags);
for_each_engine_masked(engine, gt, awake, tmp) {
-		struct reg_and_bit rb;
-
-		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			rb.mcr_reg = xehp_regs[engine->class];
-			rb.bit = BIT(engine->instance);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-		}
+		struct reg_and_bits rb = get_tlb_inv_reg_and_bits(engine, false);
if (wait_for_invalidate(gt, rb))
  			drm_err_ratelimited(&gt->i915->drm,



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux