Re: [PATCH 5/6] drm/i915: dynamically allocate forcewake domains

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 19/06/2019 00:06, Daniele Ceraolo Spurio wrote:
On 6/18/19 2:23 AM, Tvrtko Ursulin wrote:
On 17/06/2019 19:09, Daniele Ceraolo Spurio wrote:
We'd like to introduce a display uncore with no forcewake domains, so
let's avoid wasting memory and be ready to allocate only what we need.
Even without multiple uncore, we still don't need all the domains on all
gens.

Looks good in principle, only a few conversation points below.


Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
---
  drivers/gpu/drm/i915/intel_uncore.c | 155 ++++++++++++++++++----------
  drivers/gpu/drm/i915/intel_uncore.h |  13 +--
  2 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index c0f5567ee096..7f311827c3ef 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -344,7 +344,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
  {
      struct intel_uncore_forcewake_domain *domain =
             container_of(timer, struct intel_uncore_forcewake_domain, timer);
-    struct intel_uncore *uncore = forcewake_domain_to_uncore(domain);
+    struct intel_uncore *uncore = domain->uncore;
      unsigned long irqflags;
      assert_rpm_device_not_suspended(uncore->rpm);
@@ -1291,23 +1291,23 @@ do { \
      (uncore)->funcs.read_fw_domains = x##_reg_read_fw_domains; \
  } while (0)
-static void fw_domain_init(struct intel_uncore *uncore,
+static int fw_domain_init(struct intel_uncore *uncore,
                 enum forcewake_domain_id domain_id,
                 i915_reg_t reg_set,
                 i915_reg_t reg_ack)
  {
      struct intel_uncore_forcewake_domain *d;
-    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
-        return;
-
-    d = &uncore->fw_domain[domain_id];
+    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
-    WARN_ON(d->wake_count);

I wonder what was this for.

Do you want to add some protection against double init of the same domain?


just a GEM_BUG_ON maybe? It shouldn't really ever happen unless we're moving something around.

Yes, sounds right.


+    d = kzalloc(sizeof(*d), GFP_KERNEL);
+    if (!d)
+        return -ENOMEM;
      WARN_ON(!i915_mmio_reg_valid(reg_set));
      WARN_ON(!i915_mmio_reg_valid(reg_ack));
+    d->uncore = uncore;
      d->wake_count = 0;
      d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
      d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
@@ -1324,7 +1324,6 @@ static void fw_domain_init(struct intel_uncore *uncore,       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0));       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1));
-
      d->mask = BIT(domain_id);
      hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -1333,6 +1332,10 @@ static void fw_domain_init(struct intel_uncore *uncore,
      uncore->fw_domains |= BIT(domain_id);
      fw_domain_reset(d);
+
+    uncore->fw_domain[domain_id] = d;
+
+    return 0;
  }
  static void fw_domain_fini(struct intel_uncore *uncore,
@@ -1340,77 +1343,92 @@ static void fw_domain_fini(struct intel_uncore *uncore,
  {
      struct intel_uncore_forcewake_domain *d;
-    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
-        return;
+    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
-    d = &uncore->fw_domain[domain_id];
+    d = fetch_and_zero(&uncore->fw_domain[domain_id]);

Maybe early if !d here and function flow just carries on?


will do.

+    uncore->fw_domains &= ~BIT(domain_id);
-    WARN_ON(d->wake_count);
-    WARN_ON(hrtimer_cancel(&d->timer));
-    memset(d, 0, sizeof(*d));
+    if (d) {
+        WARN_ON(d->wake_count);
+        WARN_ON(hrtimer_cancel(&d->timer));
+        kfree(d);
+    }
+}
-    uncore->fw_domains &= ~BIT(domain_id);
+static void intel_uncore_fw_domains_fini(struct intel_uncore *uncore)
+{
+    struct intel_uncore_forcewake_domain *d;
+    int tmp;
+
+    for_each_fw_domain(d, uncore, tmp)
+        fw_domain_fini(uncore, d->id);
  }
-static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
+static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
  {
      struct drm_i915_private *i915 = uncore->i915;
+    int ret;
      GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
+#define __fw_domain_init(id__, set__, ack__) \
+    ret = fw_domain_init(uncore, (id__), (set__), (ack__)); \
+    if (ret) \
+        goto out_clean;

Hidden control flow is slightly objectionable but I don't offer any nice alternatives so I guess will have to pass. Or maybe accumulate the error (err |= fw_domain_init(..)) as you go and then cleanup at the end if any failed?

I'd prefer to avoid accumulating the error since it'd just cause us to having to unroll more domains when we could've stopped early.

Although unrolling happens close to never-ever so it isn't a practical concern..



On the other hand the idea slightly conflicts with my other suggestion to rename existing fw_domain_init to __fw_domain_init and call the macro fw_domain_init and avoid all the churn below.


I'll pick this suggestion among the 2, unless there is another suggestion on how to avoid the hidden goto.

.. but okay, hidden goto is also only a concern if someone adds new init steps before the fw_domain_init steps, oblivious that cleanup is also needed. Perhaps the out_clean label would be enough of a clue for that case.


+
      if (INTEL_GEN(i915) >= 11) {
          int i;
-        uncore->funcs.force_wake_get =
-            fw_domains_get_with_fallback;
+        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
          uncore->funcs.force_wake_put = fw_domains_put;
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE_RENDER_GEN9,
-                   FORCEWAKE_ACK_RENDER_GEN9);
-        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
-                   FORCEWAKE_BLITTER_GEN9,
-                   FORCEWAKE_ACK_BLITTER_GEN9);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE_RENDER_GEN9,
+                 FORCEWAKE_ACK_RENDER_GEN9);
+        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
+                 FORCEWAKE_BLITTER_GEN9,
+                 FORCEWAKE_ACK_BLITTER_GEN9);
+
          for (i = 0; i < I915_MAX_VCS; i++) {
              if (!HAS_ENGINE(i915, _VCS(i)))
                  continue;
-            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
-                       FORCEWAKE_MEDIA_VDBOX_GEN11(i),
-                       FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
+            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
+                     FORCEWAKE_MEDIA_VDBOX_GEN11(i),
+                     FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
          }
          for (i = 0; i < I915_MAX_VECS; i++) {
              if (!HAS_ENGINE(i915, _VECS(i)))
                  continue;
-            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
-                       FORCEWAKE_MEDIA_VEBOX_GEN11(i),
-                       FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
+            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
+                     FORCEWAKE_MEDIA_VEBOX_GEN11(i),
+                     FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
          }
      } else if (IS_GEN_RANGE(i915, 9, 10)) {
-        uncore->funcs.force_wake_get =
-            fw_domains_get_with_fallback;
+        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
          uncore->funcs.force_wake_put = fw_domains_put;
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE_RENDER_GEN9,
-                   FORCEWAKE_ACK_RENDER_GEN9);
-        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
-                   FORCEWAKE_BLITTER_GEN9,
-                   FORCEWAKE_ACK_BLITTER_GEN9);
-        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
-                   FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE_RENDER_GEN9,
+                 FORCEWAKE_ACK_RENDER_GEN9);
+        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
+                 FORCEWAKE_BLITTER_GEN9,
+                 FORCEWAKE_ACK_BLITTER_GEN9);
+        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
+                 FORCEWAKE_MEDIA_GEN9,
+                 FORCEWAKE_ACK_MEDIA_GEN9);
      } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
          uncore->funcs.force_wake_get = fw_domains_get;
          uncore->funcs.force_wake_put = fw_domains_put;
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
-        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
-                   FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
+        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
+                 FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
      } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
          uncore->funcs.force_wake_get =
              fw_domains_get_with_thread_status;
          uncore->funcs.force_wake_put = fw_domains_put;
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
      } else if (IS_IVYBRIDGE(i915)) {
          u32 ecobus;
@@ -1437,8 +1455,8 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
          __raw_uncore_write32(uncore, FORCEWAKE, 0);
          __raw_posting_read(uncore, ECOBUS);
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE_MT, FORCEWAKE_MT_ACK);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE_MT, FORCEWAKE_MT_ACK);
          spin_lock_irq(&uncore->lock);
          fw_domains_get_with_thread_status(uncore, FORCEWAKE_RENDER);
@@ -1449,19 +1467,27 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
          if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
              DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");               DRM_INFO("when using vblank-synced partial screen updates.\n");
-            fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                       FORCEWAKE, FORCEWAKE_ACK);
+            __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                     FORCEWAKE, FORCEWAKE_ACK);
          }
      } else if (IS_GEN(i915, 6)) {
          uncore->funcs.force_wake_get =
              fw_domains_get_with_thread_status;
          uncore->funcs.force_wake_put = fw_domains_put;
-        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                   FORCEWAKE, FORCEWAKE_ACK);
+        __fw_domain_init(FW_DOMAIN_ID_RENDER,
+                 FORCEWAKE, FORCEWAKE_ACK);
      }
+#undef __fw_domain_init
+
      /* All future platforms are expected to require complex power gating */
      WARN_ON(uncore->fw_domains == 0);
+
+    return 0;
+
+out_clean:
+    intel_uncore_fw_domains_fini(uncore);
+    return ret;
  }
  #define ASSIGN_FW_DOMAINS_TABLE(uncore, d) \
@@ -1562,13 +1588,17 @@ static void uncore_raw_init(struct intel_uncore *uncore)
      }
  }
-static void uncore_forcewake_init(struct intel_uncore *uncore)
+static int uncore_forcewake_init(struct intel_uncore *uncore)
  {
      struct drm_i915_private *i915 = uncore->i915;
+    int ret;
      GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
-    intel_uncore_fw_domains_init(uncore);
+    ret = intel_uncore_fw_domains_init(uncore);
+    if (ret)
+        return ret;
+
      forcewake_early_sanitize(uncore, 0);
      if (IS_GEN_RANGE(i915, 6, 7)) {
@@ -1601,6 +1631,8 @@ static void uncore_forcewake_init(struct intel_uncore *uncore)       uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier;
iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb);
+
+    return 0;
  }
  int intel_uncore_init_mmio(struct intel_uncore *uncore)
@@ -1619,10 +1651,13 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
      uncore->unclaimed_mmio_check = 1;
-    if (!intel_uncore_has_forcewake(uncore))
+    if (!intel_uncore_has_forcewake(uncore)) {
          uncore_raw_init(uncore);
-    else
-        uncore_forcewake_init(uncore);
+    } else {
+        ret = uncore_forcewake_init(uncore);
+        if (ret)
+            goto out_mmio_cleanup;
+    }
      /* make sure fw funcs are set if and only if we have fw*/
      GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.force_wake_get); @@ -1644,6 +1679,11 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)           DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
      return 0;
+
+out_mmio_cleanup:
+    uncore_mmio_cleanup(uncore);
+
+    return ret;
  }
  /*
@@ -1689,6 +1729,7 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore)
          iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
              &uncore->pmic_bus_access_nb);
          intel_uncore_forcewake_reset(uncore);
+        intel_uncore_fw_domains_fini(uncore);
          iosf_mbi_punit_release();
      }
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 879252735bba..bbff281b880d 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -126,6 +126,7 @@ struct intel_uncore {
      enum forcewake_domains fw_domains_saved; /* user domains saved for S3 */
      struct intel_uncore_forcewake_domain {
+        struct intel_uncore *uncore;
          enum forcewake_domain_id id;
          enum forcewake_domains mask;
          unsigned int wake_count;
@@ -133,7 +134,7 @@ struct intel_uncore {
          struct hrtimer timer;
          u32 __iomem *reg_set;
          u32 __iomem *reg_ack;
-    } fw_domain[FW_DOMAIN_ID_COUNT];
+    } *fw_domain[FW_DOMAIN_ID_COUNT];

The array itself could also be made dynamic, no? To much hassle for very little gain?


With the current approach we don't know how many domains we have in advance, so we'd have to either add a table for that or realloc the array as we increase the number of domains. Both options don't seem worth the hassle IMO.

Agreed. It would also need changes in lookup so it looks even less appealing that it did to me yesterday. :)

Regards,

Tvrtko



Thanks,
Daniele

      struct {
          unsigned int count;
@@ -147,18 +148,12 @@ struct intel_uncore {
  /* Iterate over initialised fw domains */
  #define for_each_fw_domain_masked(domain__, mask__, uncore__, tmp__) \
-    for (tmp__ = (mask__); \
-         tmp__ ? (domain__ = &(uncore__)->fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
+    for (tmp__ = (mask__); tmp__ ;) \
+        for_each_if(domain__ = (uncore__)->fw_domain[__mask_next_bit(tmp__)])
  #define for_each_fw_domain(domain__, uncore__, tmp__) \
      for_each_fw_domain_masked(domain__, (uncore__)->fw_domains, uncore__, tmp__)
-static inline struct intel_uncore *
-forcewake_domain_to_uncore(const struct intel_uncore_forcewake_domain *d)
-{
-    return container_of(d, struct intel_uncore, fw_domain[d->id]);
-}
-
  static inline bool
  intel_uncore_has_forcewake(const struct intel_uncore *uncore)
  {


Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux