Re: [PATCH v5 1/4] drm/i915/perf: break OA config buffer object in 2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Yeah it's only filtering.

We need to pin the context to a particular HW tag so that filtering can work properly.

-Lionel

On 14/04/2020 02:24, Umesh Nerlige Ramappa wrote:
Hi Lionel,

What's the implication of using separate contexts for 3d and compute on perf OA? Is it only context-filtering? If so, have you considered disabling context filtering with a parameter instead of actually filtering for specific contexts? Is this privileged use case?

Thanks,
Umesh

On Thu, Apr 09, 2020 at 12:17:03PM +0300, Lionel Landwerlin wrote:
We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

v2: Use an offset into the same VMA (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
1 file changed, 116 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5cde3e4e7be6..d2183fd701a3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -372,6 +372,7 @@ struct i915_oa_config_bo {

    struct i915_oa_config *oa_config;
    struct i915_vma *vma;
+    u32 per_context_offset;
};

static struct ctl_table_header *sysctl_header;
@@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo *
alloc_oa_config_buffer(struct i915_perf_stream *stream,
               struct i915_oa_config *oa_config)
{
-    struct drm_i915_gem_object *obj;
    struct i915_oa_config_bo *oa_bo;
+    struct drm_i915_gem_object *obj;
    size_t config_length = 0;
-    u32 *cs;
+    u32 *cs_start, *cs;
    int err;

    oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
    if (!oa_bo)
        return ERR_PTR(-ENOMEM);

+    /*
+     * Global configuration requires a jump into the NOA wait BO for it to
+     * apply.
+     */
    config_length += num_lri_dwords(oa_config->mux_regs_len);
    config_length += num_lri_dwords(oa_config->b_counter_regs_len);
-    config_length += num_lri_dwords(oa_config->flex_regs_len);
    config_length += 3; /* MI_BATCH_BUFFER_START */
+
+    config_length += num_lri_dwords(oa_config->flex_regs_len);
+    config_length += 1 /* MI_BATCH_BUFFER_END */;
+
    config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);

-    obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+    obj = i915_gem_object_create_shmem(stream->perf->i915,
+                       config_length);
    if (IS_ERR(obj)) {
        err = PTR_ERR(obj);
        goto err_free;
    }

-    cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
-    if (IS_ERR(cs)) {
-        err = PTR_ERR(cs);
-        goto err_oa_bo;
+    cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
+    if (IS_ERR(cs_start)) {
+        err = PTR_ERR(cs_start);
+        goto err_bo;
    }

-    cs = write_cs_mi_lri(cs,
-                 oa_config->mux_regs,
-                 oa_config->mux_regs_len);
+    cs = cs_start;
    cs = write_cs_mi_lri(cs,
                 oa_config->b_counter_regs,
                 oa_config->b_counter_regs_len);
@@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
    *cs++ = i915_ggtt_offset(stream->noa_wait);
    *cs++ = 0;

+    oa_bo->per_context_offset = 4 * (cs - cs_start);
+
+    cs = write_cs_mi_lri(cs,
+                 oa_config->mux_regs,
+                 oa_config->mux_regs_len);
+
+    *cs++ = MI_BATCH_BUFFER_END;
+
    i915_gem_object_flush_map(obj);
    i915_gem_object_unpin_map(obj);

@@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
                       NULL);
    if (IS_ERR(oa_bo->vma)) {
        err = PTR_ERR(oa_bo->vma);
-        goto err_oa_bo;
+        goto err_bo;
    }

    oa_bo->oa_config = i915_oa_config_get(oa_config);
@@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,

    return oa_bo;

-err_oa_bo:
+err_bo:
    i915_gem_object_put(obj);
err_free:
    kfree(oa_bo);
    return ERR_PTR(err);
}

-static struct i915_vma *
-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+static struct i915_oa_config_bo *
+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
{
    struct i915_oa_config_bo *oa_bo;

@@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
            memcmp(oa_bo->oa_config->uuid,
               oa_config->uuid,
               sizeof(oa_config->uuid)) == 0)
-            goto out;
+            return oa_bo;
    }

-    oa_bo = alloc_oa_config_buffer(stream, oa_config);
-    if (IS_ERR(oa_bo))
-        return ERR_CAST(oa_bo);
-
-out:
-    return i915_vma_get(oa_bo->vma);
+    return alloc_oa_config_buffer(stream, oa_config);
}

static int
emit_oa_config(struct i915_perf_stream *stream,
           struct i915_oa_config *oa_config,
           struct intel_context *ce,
-           struct i915_active *active)
+           struct i915_active *active,
+           bool global)
{
+    struct i915_oa_config_bo *oa_bo;
    struct i915_request *rq;
-    struct i915_vma *vma;
+    u64 vma_offset;
    int err;

-    vma = get_oa_vma(stream, oa_config);
-    if (IS_ERR(vma))
-        return PTR_ERR(vma);
+    oa_bo = get_oa_bo(stream, oa_config);
+    if (IS_ERR(oa_bo))
+        return PTR_ERR(oa_bo);

-    err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+    err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
    if (err)
-        goto err_vma_put;
+        return err;

    intel_engine_pm_get(ce->engine);
    rq = i915_request_create(ce);
@@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream,
            goto err_add_request;
    }

-    i915_vma_lock(vma);
-    err = i915_request_await_object(rq, vma->obj, 0);
+    i915_vma_lock(oa_bo->vma);
+    err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
    if (!err)
-        err = i915_vma_move_to_active(vma, rq, 0);
-    i915_vma_unlock(vma);
+        err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
+    i915_vma_unlock(oa_bo->vma);
    if (err)
        goto err_add_request;

-    err = rq->engine->emit_bb_start(rq,
-                    vma->node.start, 0,
+    vma_offset = oa_bo->vma->node.start;
+    if (!global)
+        vma_offset += oa_bo->per_context_offset;
+
+    err = rq->engine->emit_bb_start(rq, vma_offset, 0,
                    I915_DISPATCH_SECURE);
    if (err)
        goto err_add_request;
@@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream,
err_add_request:
    i915_request_add(rq);
err_vma_unpin:
-    i915_vma_unpin(vma);
-err_vma_put:
-    i915_vma_put(vma);
+    i915_vma_unpin(oa_bo->vma);
    return err;
}

@@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
              struct i915_active *active)
{
    struct intel_uncore *uncore = stream->uncore;
+    int err;

    /*
     * PRM:
@@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
    intel_uncore_rmw(uncore, GEN6_UCGCTL1,
             0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);

-    return emit_oa_config(stream,
-                  stream->oa_config, oa_context(stream),
-                  active);
+    err = emit_oa_config(stream, stream->oa_config,
+                 oa_context(stream),
+                 active,
+                 false /* global */);
+    if (err)
+        return err;
+
+    return emit_oa_config(stream, stream->oa_config,
+                  oa_context(stream),
+                  active,
+                  true /* global */);
}

static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
{
    struct intel_uncore *uncore = stream->uncore;
    struct i915_oa_config *oa_config = stream->oa_config;
-    int ret;
+    int err;

    /*
     * We disable slice/unslice clock ratio change reports on SKL since
@@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
     * to make sure all slices/subslices are ON before writing to NOA
     * registers.
     */
-    ret = lrc_configure_all_contexts(stream, oa_config, active);
-    if (ret)
-        return ret;
+    err = lrc_configure_all_contexts(stream, oa_config, active);
+    if (err)
+        return err;

-    return emit_oa_config(stream,
-                  stream->oa_config, oa_context(stream),
-                  active);
+    err = emit_oa_config(stream, oa_config,
+                 oa_context(stream),
+                 active,
+                 false /* global */);
+    if (err)
+        return err;
+
+    return emit_oa_config(stream, stream->oa_config,
+                  oa_context(stream),
+                  active,
+                  true /* global */);
}

static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) @@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
        return ret;

    /*
-     * For Gen12, performance counters are context
-     * saved/restored. Only enable it for the context that
-     * requested this.
+     * For Gen12, performance counters are also context saved/restored on
+     * another set of performance registers. Configure the unit dealing
+     * with those.
     */
    if (stream->ctx) {
        ret = gen12_configure_oar_context(stream, active);
@@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
            return ret;
    }

-    return emit_oa_config(stream,
-                  stream->oa_config, oa_context(stream),
-                  active);
+    ret = emit_oa_config(stream, oa_config,
+                 oa_context(stream),
+                 active,
+                 false /* global */);
+    if (ret)
+        return ret;
+
+    return emit_oa_config(stream, stream->oa_config,
+                  oa_context(stream),
+                  active,
+                  true /* global */);
}

static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
                    unsigned long metrics_set)
{
    struct i915_oa_config *config;
+    struct i915_active *active = NULL;
    long ret = stream->oa_config->id;

    config = i915_perf_get_oa_config(stream->perf, metrics_set);
@@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
        return -EINVAL;

    if (config != stream->oa_config) {
-        int err;
+        active = i915_active_create();
+        if (!active) {
+            ret = -ENOMEM;
+            goto err_config;
+        }

        /*
         * If OA is bound to a specific context, emit the
@@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
         * When set globally, we use a low priority kernel context,
         * so it will effectively take effect when idle.
         */
-        err = emit_oa_config(stream, config, oa_context(stream), NULL);
-        if (!err)
-            config = xchg(&stream->oa_config, config);
-        else
-            ret = err;
+        ret = emit_oa_config(stream, config,
+                     oa_context(stream),
+                     active,
+                     false /* global */);
+        if (ret)
+            goto err_active;
+
+        ret = emit_oa_config(stream, config,
+                     oa_context(stream),
+                     active,
+                     true /* global */);
+        if (ret)
+            goto err_active;
+
+        config = xchg(&stream->oa_config, config);
    }

+err_active:
+    i915_active_put(active);
+err_config:
    i915_oa_config_put(config);

    return ret;
--
2.26.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux