On 9/28/2022 11:07 AM, Matt Roper wrote:
On Tue, Sep 27, 2022 at 05:22:41PM -0700, Ceraolo Spurio, Daniele wrote:
On 9/27/2022 5:10 PM, Matt Roper wrote:
On Thu, Sep 22, 2022 at 03:11:17PM -0700, Daniele Ceraolo Spurio wrote:
The render and media GuCs share the same interrupt enable register, so
we can no longer disable interrupts when we disable communication for
one of the GuCs as this would impact the other GuC. Instead, we keep the
interrupts always enabled in HW and use a variable in the GuC structure
to determine if we want to service the received interrupts or not.
Even if they have a unified enable bit, can't we still just update the
per-GuC mask bit to get the same behavior (i.e., no interrupts
delivered to the host for that specific GuC)?
We could yes, but we've avoided dynamically using masks for gen11+ because
it can mess with rc6 (e.g., see
https://patchwork.freedesktop.org/patch/207829/).
+Cc Mika & Tvrtko in case they remember more historic details.
Is that expected/documented behavior? Or is it an unlabelled workaround
that might not be an issue anymore on newer platforms? Also, it looks
like that patch only applies to RING_IMR and doesn't necessarily impact
other interrupt masking such as the GuC mask.
It was documented, though don't ask me where because it was an ICL doc
and it's been too long :).
IIRC the issue is that enabled but masked interrupts are still
accumulated (and immediately triggered once the mask is cleared) and
they can keep the power up until they're actually serviced.
The code today (which seems to be in use without problem on both gen12
and xehp) is setting all mask bits in GEN11_GUC_SG_INTR_MASK and only
clearing the single G2H bit at the point G2H interrupts are enabled.
GEN11_GUC_SG_INTR_MASK has now become GEN12_GUC_MGUC_INTR_MASK, but it
seems like keeping the masking logic the same as we've been using on
gen12 and xehp would be fine if we just never clear the enable bit?
The main difference is that with the current gen12 code the interrupts
are enabled and unmasked at the same time, so the situation above
(pending interrupt stuck behind the mask keeping the power up) can never
happen, while it can if we keep interrupts always enabled and start
using the masks dynamically. We usually only disable and re-enable
interrupts when the GuC is dead so it's not likely that we get an
interrupt stuck behind the mask, but I'm not confident that all paths
are safe (particularly around suspend). Instead of sanitizing all
possible paths to make sure they're ok, keeping the interrupts enabled
and dropping the rare unexpected ones seemed simpler to me.
Daniele
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
Cc: Matt Roper <matthew.d.roper@xxxxxxxxx>
Cc: John Harrison <John.C.Harrison@xxxxxxxxx>
Cc: Alan Previn <alan.previn.teres.alexis@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_gt_irq.c | 21 ++++++++++++++----
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 29 ++++++++++++++-----------
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 5 ++++-
drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 +++++--
5 files changed, 45 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f26882fdc24c..e33ed9ae1439 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -17,6 +17,9 @@
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
+ if (unlikely(!guc->interrupts.enabled))
+ return;
+
if (iir & GUC_INTR_GUC2HOST)
intel_guc_to_host_event_handler(guc);
}
@@ -249,6 +252,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
u32 irqs = GT_RENDER_USER_INTERRUPT;
+ u32 guc_mask = intel_uc_wants_guc(>->uc) ? GUC_INTR_GUC2HOST : 0;
const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
u32 dmask;
u32 smask;
@@ -299,6 +303,19 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
if (HAS_HECI_GSC(gt->i915))
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask);
+ if (guc_mask) {
+ /* the enable bit is common for both GTs but the masks are separate */
+ u32 mask = gt->type == GT_MEDIA ?
+ REG_FIELD_PREP(ENGINE0_MASK, guc_mask) :
+ REG_FIELD_PREP(ENGINE1_MASK, guc_mask);
+
+ intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE,
+ REG_FIELD_PREP(ENGINE1_MASK, guc_mask));
+
+ /* we might not be the first GT to write this reg */
+ intel_uncore_rmw(uncore, GEN12_GUC_MGUC_INTR_MASK, mask, 0);
+ }
+
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
* is enabled/disabled.
@@ -307,10 +324,6 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
gt->pm_imr = ~gt->pm_ier;
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
-
- /* Same thing for GuC interrupts */
- intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
- intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0);
}
void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 1cbb7226400b..792809e49680 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1519,6 +1519,7 @@
#define GEN11_CSME (31)
#define GEN11_GUNIT (28)
#define GEN11_GUC (25)
+#define GEN12_GUCM (24)
#define GEN11_WDPERF (20)
#define GEN11_KCR (19)
#define GEN11_GTPM (16)
@@ -1573,6 +1574,7 @@
#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0)
#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4)
#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8)
+#define GEN12_GUC_MGUC_INTR_MASK _MMIO(0x1900e8) /* MTL+ */
Technically we should probably give this a "MTL_" prefix or something
since we're not referring to new platforms as "gen12" anymore.
ok. Should I change GEN12_GUCM as well?
Yeah, at this point "gen12" is a historic term that only covers the
pre-Xe_HP platforms and we've been asked not to use it for any newer
platforms, even if they happen to have an IP version with a major number
of "12."
#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec)
#define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0)
#define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index b0beab44b34c..ab0263d8e1cf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -98,6 +98,8 @@ static void gen9_enable_guc_interrupts(struct intel_guc *guc)
gt->pm_guc_events);
gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
spin_unlock_irq(gt->irq_lock);
+
+ guc->interrupts.enabled = true;
}
static void gen9_disable_guc_interrupts(struct intel_guc *guc)
@@ -105,6 +107,7 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc)
struct intel_gt *gt = guc_to_gt(guc);
assert_rpm_wakelock_held(>->i915->runtime_pm);
+ guc->interrupts.enabled = false;
spin_lock_irq(gt->irq_lock);
@@ -116,39 +119,39 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc)
gen9_reset_guc_interrupts(guc);
}
+static bool __gen11_reset_guc_interrupts(struct intel_gt *gt)
+{
+ u32 irq = gt->type == GT_MEDIA ? GEN12_GUCM : GEN11_GUC;
+
+ lockdep_assert_held(gt->irq_lock);
+ return gen11_gt_reset_one_iir(gt, 0, irq);
+}
+
static void gen11_reset_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
spin_lock_irq(gt->irq_lock);
- gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
+ __gen11_reset_guc_interrupts(gt);
spin_unlock_irq(gt->irq_lock);
}
static void gen11_enable_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
spin_lock_irq(gt->irq_lock);
- WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
- intel_uncore_write(gt->uncore,
- GEN11_GUC_SG_INTR_ENABLE, events);
- intel_uncore_write(gt->uncore,
- GEN11_GUC_SG_INTR_MASK, ~events);
The modified postinstall left us with GUC2HOST enabled but masked.
Don't we still need to clear the mask so the interrupts will start being
delivered?
The postinstall does:
intel_uncore_rmw(uncore, GEN12_GUC_MGUC_INTR_MASK, mask, 0);
which clears the "mask" (i.e. the G2H interrupt shifted based on which GuC
it is) in the mask register, so the G2H is allowed.
Woops, yeah. I got the order of the rmw parameters mixed up here.
Matt
Daniele
Matt
+ __gen11_reset_guc_interrupts(gt);
spin_unlock_irq(gt->irq_lock);
+
+ guc->interrupts.enabled = true;
}
static void gen11_disable_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- spin_lock_irq(gt->irq_lock);
-
- intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
- intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
-
- spin_unlock_irq(gt->irq_lock);
+ guc->interrupts.enabled = false;
intel_synchronize_irq(gt->i915);
gen11_reset_guc_interrupts(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 804133df1ac9..061d55de3a94 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -78,6 +78,7 @@ struct intel_guc {
/** @interrupts: pointers to GuC interrupt-managing functions. */
struct {
+ bool enabled;
void (*reset)(struct intel_guc *guc);
void (*enable)(struct intel_guc *guc);
void (*disable)(struct intel_guc *guc);
@@ -316,9 +317,11 @@ static inline int intel_guc_send_busy_loop(struct intel_guc *guc,
return err;
}
+/* Only call this from the interrupt handler code */
static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
{
- intel_guc_ct_event_handler(&guc->ct);
+ if (guc->interrupts.enabled)
+ intel_guc_ct_event_handler(&guc->ct);
}
/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4cd8a787f9e5..1d28286e6f06 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -636,8 +636,10 @@ void intel_uc_runtime_suspend(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
- if (!intel_guc_is_ready(guc))
+ if (!intel_guc_is_ready(guc)) {
+ guc->interrupts.enabled = false;
return;
+ }
/*
* Wait for any outstanding CTB before tearing down communication /w the
@@ -657,8 +659,10 @@ void intel_uc_suspend(struct intel_uc *uc)
intel_wakeref_t wakeref;
int err;
- if (!intel_guc_is_ready(guc))
+ if (!intel_guc_is_ready(guc)) {
+ guc->interrupts.enabled = false;
return;
+ }
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc);
--
2.37.3