On Tue, Apr 25, 2017 at 01:25:48PM +0100, Chris Wilson wrote: > On Tue, Apr 25, 2017 at 01:10:34PM +0100, Tvrtko Ursulin wrote: > > > > On 25/04/2017 12:35, Chris Wilson wrote: > > >On Tue, Apr 25, 2017 at 12:13:04PM +0100, Tvrtko Ursulin wrote: > > >>From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > [snip] > > >>+static enum intel_engine_id > > >>+rt_balance(const struct workload_balancer *balancer, > > >>+ struct workload *wrk, struct w_step *w) > > >>+{ > > >>+ enum intel_engine_id engine; > > >>+ long qd[NUM_ENGINES]; > > >>+ unsigned int n; > > >>+ > > >>+ igt_assert(w->engine == VCS); > > >>+ > > >>+ /* Estimate the "speed" of the most recent batch > > >>+ * (finish time - submit time) > > >>+ * and use that as an approximate for the total remaining time for > > >>+ * all batches on that engine. We try to keep the total remaining > > >>+ * balanced between the engines. > > >>+ */ > > > > > >Next steps for this would be to move from an instantaneous speed, to an > > >average. I'm thinking something like a exponential decay moving average > > >just to make the estimation more robust. > > > > Do you think it would be OK to merge these two tools at this point > > and continue improving them in place? > > Yes. Although there's no excuse no to make this NO_RELOC from the start, > especially if we want to demonstrate how it should be done! Hopefully > attached the delta. Which I forgot. Let's try again... -Chris -- Chris Wilson, Intel Open Source Technology Centre
>From 985f873f1c9cdaec396c5410738910da04e8f95b Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Date: Tue, 25 Apr 2017 13:22:39 +0100 Subject: [PATCH] no-reloc --- benchmarks/gem_wsim.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index 1f491a5..f13477a 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -90,9 +90,13 @@ struct w_step struct drm_i915_gem_relocation_entry reloc[3]; unsigned long bb_sz; uint32_t bb_handle; - uint32_t *mapped_batch, *mapped_seqno; - unsigned int mapped_len; + uint32_t *mapped_batch; + uint32_t *seqno_value; + uint32_t *seqno_address; uint32_t *rt0_value; + uint32_t *rt0_address; + uint32_t *rt1_address; + unsigned int mapped_len; }; struct workload @@ -463,9 +467,10 @@ terminate_bb(struct w_step *w, unsigned int flags) batch_start += 4 * sizeof(uint32_t); *cs++ = MI_STORE_DWORD_IMM; + w->seqno_address = cs; *cs++ = 0; *cs++ = 0; - w->mapped_seqno = cs; + w->seqno_value = cs; *cs++ = 0; } @@ -474,6 +479,7 @@ terminate_bb(struct w_step *w, unsigned int flags) batch_start += 4 * sizeof(uint32_t); *cs++ = MI_STORE_DWORD_IMM; + w->rt0_address = cs; *cs++ = 0; *cs++ = 0; w->rt0_value = cs; @@ -484,6 +490,7 @@ terminate_bb(struct w_step *w, unsigned int flags) *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */ *cs++ = RCS_TIMESTAMP; + w->rt1_address = cs; *cs++ = 0; *cs++ = 0; } @@ -500,8 +507,7 @@ eb_update_flags(struct w_step *w, enum intel_engine_id engine, { w->eb.flags = eb_engine_map[engine]; w->eb.flags |= I915_EXEC_HANDLE_LUT; - if (!(flags & SEQNO)) - w->eb.flags |= I915_EXEC_NO_RELOC; + w->eb.flags |= I915_EXEC_NO_RELOC; } static void @@ -543,10 +549,8 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) w->obj[bb_i].relocation_count = 3; else w->obj[bb_i].relocation_count = 1; - for (int i = 0; i < w->obj[bb_i].relocation_count; i++) { - w->reloc[i].presumed_offset = -1; + for (int i = 0; i < w->obj[bb_i].relocation_count; i++) w->reloc[i].target_handle = 1; - } } w->eb.buffers_ptr = to_user_pointer(w->obj); @@ -782,10 +786,14 @@ update_bb_seqno(struct w_step *w, enum intel_engine_id engine, uint32_t seqno) gem_set_domain(fd, w->bb_handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - *w->mapped_seqno = seqno; - - w->reloc[0].presumed_offset = -1; w->reloc[0].delta = VCS_SEQNO_OFFSET(engine); + + *w->seqno_value = seqno; + *w->seqno_address = w->reloc[0].presumed_offset + w->reloc[0].delta; + + /* If not using NO_RELOC, force the relocations */ + if ((w->eb.flags & I915_EXEC_NO_RELOC)) + w->reloc[0].presumed_offset = -1; } static void @@ -796,13 +804,18 @@ update_bb_rt(struct w_step *w, enum intel_engine_id engine) gem_set_domain(fd, w->bb_handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - *w->rt0_value = *REG(RCS_TIMESTAMP); - - w->reloc[1].presumed_offset = -1; w->reloc[1].delta = VCS_SEQNO_OFFSET(engine) + sizeof(uint32_t); - - w->reloc[2].presumed_offset = -1; w->reloc[2].delta = VCS_SEQNO_OFFSET(engine) + 2 * sizeof(uint32_t); + + *w->rt0_value = *REG(RCS_TIMESTAMP); + *w->rt0_address = w->reloc[1].presumed_offset + w->reloc[1].delta; + *w->rt1_address = w->reloc[1].presumed_offset + w->reloc[1].delta; + + /* If not using NO_RELOC, force the relocations */ + if ((w->eb.flags & I915_EXEC_NO_RELOC)) { + w->reloc[1].presumed_offset = -1; + w->reloc[2].presumed_offset = -1; + } } static void w_sync_to(struct workload *wrk, struct w_step *w, int target) -- 1.9.1
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx