Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Some basic information that is useful to know, such as how many cycles > is a MI_NOOP. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Anna Karas <anna.karas@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > .../i915/gem/selftests/i915_gem_object_blt.c | 15 +- > drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 339 +++++++++++++++++- > drivers/gpu/drm/i915/i915_pci.c | 6 + > drivers/gpu/drm/i915/i915_selftest.h | 4 + > .../drm/i915/selftests/i915_perf_selftests.h | 13 + > .../gpu/drm/i915/selftests/i915_selftest.c | 43 +++ > 6 files changed, 417 insertions(+), 3 deletions(-) > create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf_selftests.h > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > index 146ba71ce797..17fc60aa123a 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > @@ -527,8 +527,6 @@ static int igt_copy_blt_ctx0(void *arg) > int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) > { > static const struct i915_subtest tests[] = { > - SUBTEST(perf_fill_blt), > - SUBTEST(perf_copy_blt), > SUBTEST(igt_fill_blt), > SUBTEST(igt_fill_blt_ctx0), > SUBTEST(igt_copy_blt), > @@ -543,3 +541,16 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) > > return i915_live_subtests(tests, i915); > } > + > +int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) > +{ > + static const struct i915_subtest tests[] = { > + SUBTEST(perf_fill_blt), > + SUBTEST(perf_copy_blt), > + }; > + > + if (intel_gt_is_wedged(&i915->gt)) > + return 0; > + > + return i915_live_subtests(tests, i915); > +} > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > index 3880f07c29b8..aa6b1997eddb 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > @@ -4,7 +4,344 @@ > * Copyright © 2018 Intel Corporation > */ > > -#include "../i915_selftest.h" > +#include <linux/sort.h> > + > +#include "intel_gt_pm.h" > +#include "intel_rps.h" > + > +#include "i915_selftest.h" > +#include "selftests/igt_flush_test.h" > + > +#define COUNT 5 > + > +static int cmp_u32(const void *A, const void *B) > +{ > + const u32 *a = A, *b = B; > + > + return *a - *b; > +} > + > +static void perf_begin(struct intel_gt *gt) > +{ > + intel_gt_pm_get(gt); > + > + /* Boost gpufreq to max [waitboost] and keep it fixed */ > + atomic_inc(>->rps.num_waiters); > + schedule_work(>->rps.work); > + flush_work(>->rps.work); > +} > + > +static int perf_end(struct intel_gt *gt) > +{ > + atomic_dec(>->rps.num_waiters); > + intel_gt_pm_put(gt); > + > + return igt_flush_test(gt->i915); > +} > + > +static int write_timestamp(struct i915_request *rq, int slot) > +{ > + u32 cmd; > + u32 *cs; > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; > + if (INTEL_GEN(rq->i915) >= 8) > + cmd++; > + *cs++ = cmd; > + *cs++ = rq->engine->mmio_base + 0x358; we have RING_TIMESTAMP. Test so short we don't need the UDW I suppose? -Mika, just strolling around here. > + *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); > + *cs++ = 0; > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > +static struct i915_vma *create_empty_batch(struct intel_context *ce) > +{ > + struct drm_i915_gem_object *obj; > + struct i915_vma *vma; > + u32 *cs; > + int err; > + > + obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); > + if (IS_ERR(obj)) > + return ERR_CAST(obj); > + > + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(cs)) { > + i915_gem_object_put(obj); > + return ERR_CAST(cs); > + } > + > + cs[0] = MI_BATCH_BUFFER_END; > + > + i915_gem_object_flush_map(obj); > + i915_gem_object_unpin_map(obj); > + > + vma = i915_vma_instance(obj, ce->vm, NULL); > + if (IS_ERR(vma)) { > + i915_gem_object_put(obj); > + return vma; > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (err) { > + i915_gem_object_put(obj); > + return ERR_PTR(err); > + } > + > + return vma; > +} > + > +static u32 trifilter(u32 *a) > +{ > + u64 sum; > + > + sort(a, COUNT, sizeof(*a), cmp_u32, NULL); > + > + sum += mul_u32_u32(a[2], 2); > + sum += a[1]; > + sum += a[3]; > + > + return sum >> 2; > +} > + > +static int perf_mi_bb_start(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + int err = 0; > + > + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ > + return 0; > + > + perf_begin(gt); > + for_each_engine(engine, gt, id) { > + struct intel_context *ce = engine->kernel_context; > + struct i915_vma *batch; > + u32 cycles[COUNT]; > + int i; > + > + batch = create_empty_batch(ce); > + if (IS_ERR(batch)) { > + err = PTR_ERR(batch); > + break; > + } > + > + err = i915_vma_sync(batch); > + if (err) { > + i915_vma_put(batch); > + break; > + } > + > + for (i = 0; i < ARRAY_SIZE(cycles); i++) { > + struct i915_request *rq; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + break; > + } > + > + err = write_timestamp(rq, 2); > + if (err) > + goto out; > + > + err = rq->engine->emit_bb_start(rq, > + batch->node.start, 8, > + 0); > + if (err) > + goto out; > + > + err = write_timestamp(rq, 3); > + if (err) > + goto out; > + > +out: > + i915_request_get(rq); > + i915_request_add(rq); > + > + if (i915_request_wait(rq, 0, HZ / 5) < 0) > + err = -EIO; > + i915_request_put(rq); > + if (err) > + break; > + > + cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; > + } > + i915_vma_put(batch); > + if (err) > + break; > + > + pr_info("%s: MI_BB_START cycles: %u\n", > + engine->name, trifilter(cycles)); > + } > + if (perf_end(gt)) > + err = -EIO; > + > + return err; > +} > + > +static struct i915_vma *create_nop_batch(struct intel_context *ce) > +{ > + struct drm_i915_gem_object *obj; > + struct i915_vma *vma; > + u32 *cs; > + int err; > + > + obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); > + if (IS_ERR(obj)) > + return ERR_CAST(obj); > + > + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(cs)) { > + i915_gem_object_put(obj); > + return ERR_CAST(cs); > + } > + > + memset(cs, 0, SZ_64K); > + cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; > + > + i915_gem_object_flush_map(obj); > + i915_gem_object_unpin_map(obj); > + > + vma = i915_vma_instance(obj, ce->vm, NULL); > + if (IS_ERR(vma)) { > + i915_gem_object_put(obj); > + return vma; > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (err) { > + i915_gem_object_put(obj); > + return ERR_PTR(err); > + } > + > + return vma; > +} > + > +static int perf_mi_noop(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + int err = 0; > + > + if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ > + return 0; > + > + perf_begin(gt); > + for_each_engine(engine, gt, id) { > + struct intel_context *ce = engine->kernel_context; > + struct i915_vma *base, *nop; > + u32 cycles[COUNT]; > + int i; > + > + base = create_empty_batch(ce); > + if (IS_ERR(base)) { > + err = PTR_ERR(base); > + break; > + } > + > + err = i915_vma_sync(base); > + if (err) { > + i915_vma_put(base); > + break; > + } > + > + nop = create_nop_batch(ce); > + if (IS_ERR(nop)) { > + err = PTR_ERR(nop); > + i915_vma_put(base); > + break; > + } > + > + err = i915_vma_sync(nop); > + if (err) { > + i915_vma_put(nop); > + i915_vma_put(base); > + break; > + } > + > + for (i = 0; i < ARRAY_SIZE(cycles); i++) { > + struct i915_request *rq; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + break; > + } > + > + err = write_timestamp(rq, 2); > + if (err) > + goto out; > + > + err = rq->engine->emit_bb_start(rq, > + base->node.start, 8, > + 0); > + if (err) > + goto out; > + > + err = write_timestamp(rq, 3); > + if (err) > + goto out; > + > + err = rq->engine->emit_bb_start(rq, > + nop->node.start, 4096, > + 0); > + if (err) > + goto out; > + > + err = write_timestamp(rq, 4); > + if (err) > + goto out; > + > +out: > + i915_request_get(rq); > + i915_request_add(rq); > + > + if (i915_request_wait(rq, 0, HZ / 5) < 0) > + err = -EIO; > + i915_request_put(rq); > + if (err) > + break; > + > + cycles[i] = > + (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - > + (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); > + } > + i915_vma_put(nop); > + i915_vma_put(base); > + if (err) > + break; > + > + pr_info("%s: 16K MI_NOOP cycles: %u\n", > + engine->name, trifilter(cycles)); > + } > + if (perf_end(gt)) > + err = -EIO; > + > + return err; > +} > + > +int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) > +{ > + static const struct i915_subtest tests[] = { > + SUBTEST(perf_mi_bb_start), > + SUBTEST(perf_mi_noop), > + }; > + > + if (intel_gt_is_wedged(&i915->gt)) > + return 0; > + > + return intel_gt_live_subtests(tests, &i915->gt); > +} > > static int intel_mmio_bases_check(void *arg) > { > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 1bb701d32a5d..da3e9b5752ac 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -1003,6 +1003,12 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > return err > 0 ? -ENOTTY : err; > } > > + err = i915_perf_selftests(pdev); > + if (err) { > + i915_pci_remove(pdev); > + return err > 0 ? -ENOTTY : err; > + } > + > return 0; > } > > diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h > index 4d88205de51b..98bcb6fa0ab4 100644 > --- a/drivers/gpu/drm/i915/i915_selftest.h > +++ b/drivers/gpu/drm/i915/i915_selftest.h > @@ -36,6 +36,7 @@ struct i915_selftest { > char *filter; > int mock; > int live; > + int perf; > }; > > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > @@ -45,6 +46,7 @@ extern struct i915_selftest i915_selftest; > > int i915_mock_selftests(void); > int i915_live_selftests(struct pci_dev *pdev); > +int i915_perf_selftests(struct pci_dev *pdev); > > /* We extract the function declarations from i915_mock_selftests.h and > * i915_live_selftests.h Add your unit test declarations there! > @@ -61,6 +63,7 @@ int i915_live_selftests(struct pci_dev *pdev); > #undef selftest > #define selftest(name, func) int func(struct drm_i915_private *i915); > #include "selftests/i915_live_selftests.h" > +#include "selftests/i915_perf_selftests.h" > #undef selftest > > struct i915_subtest { > @@ -109,6 +112,7 @@ int __i915_subtests(const char *caller, > > static inline int i915_mock_selftests(void) { return 0; } > static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } > +static inline int i915_perf_selftests(struct pci_dev *pdev) { return 0; } > > #define I915_SELFTEST_DECLARE(x) > #define I915_SELFTEST_ONLY(x) 0 > diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h > new file mode 100644 > index 000000000000..f7129a243daa > --- /dev/null > +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h > @@ -0,0 +1,13 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* List each unit test as selftest(name, function) > + * > + * The name is used as both an enum and expanded as subtest__name to create > + * a module parameter. It must be unique and legal for a C identifier. > + * > + * The function should be of type int function(void). It may be conditionally > + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). > + * > + * Tests are executed in order by igt/i915_selftest > + */ > +selftest(engine_cs, intel_engine_cs_perf_selftests) > +selftest(blt, i915_gem_object_blt_perf_selftests) > diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c > index a6cca4ad96f6..d3bf9eefb682 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c > +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c > @@ -57,6 +57,12 @@ enum { > #undef selftest > }; > > +enum { > +#define selftest(name, func) perf_##name, > +#include "i915_perf_selftests.h" > +#undef selftest > +}; > + > struct selftest { > bool enabled; > const char *name; > @@ -78,6 +84,12 @@ static struct selftest live_selftests[] = { > }; > #undef selftest > > +#define selftest(n, f) [perf_##n] = { .name = #n, { .live = f } }, > +static struct selftest perf_selftests[] = { > +#include "i915_perf_selftests.h" > +}; > +#undef selftest > + > /* Embed the line number into the parameter name so that we can order tests */ > #define selftest(n, func) selftest_0(n, func, param(n)) > #define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) > @@ -93,6 +105,13 @@ module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); > #include "i915_live_selftests.h" > #undef selftest_0 > #undef param > + > +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __perf_##n)) > +#define selftest_0(n, func, id) \ > +module_param_named(id, perf_selftests[perf_##n].enabled, bool, 0400); > +#include "i915_perf_selftests.h" > +#undef selftest_0 > +#undef param > #undef selftest > > static void set_default_test_all(struct selftest *st, unsigned int count) > @@ -200,6 +219,27 @@ int i915_live_selftests(struct pci_dev *pdev) > return 0; > } > > +int i915_perf_selftests(struct pci_dev *pdev) > +{ > + int err; > + > + if (!i915_selftest.perf) > + return 0; > + > + err = run_selftests(perf, pdev_to_i915(pdev)); > + if (err) { > + i915_selftest.perf = err; > + return err; > + } > + > + if (i915_selftest.perf < 0) { > + i915_selftest.perf = -ENOTTY; > + return 1; > + } > + > + return 0; > +} > + > static bool apply_subtest_filter(const char *caller, const char *name) > { > char *filter, *sep, *tok; > @@ -365,3 +405,6 @@ MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardw > > module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); > MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); > + > +module_param_named_unsafe(perf_selftests, i915_selftest.perf, int, 0400); > +MODULE_PARM_DESC(perf_selftests, "Run performance orientated selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); > -- > 2.24.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx