Quoting Tvrtko Ursulin (2019-05-22 16:57:12) > -static void > +static unsigned int > terminate_bb(struct w_step *w, unsigned int flags) > { > const uint32_t bbe = 0xa << 23; > unsigned long mmap_start, mmap_len; > unsigned long batch_start = w->bb_sz; > + unsigned int r = 0; > uint32_t *ptr, *cs; > > igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT)); > @@ -838,6 +854,9 @@ terminate_bb(struct w_step *w, unsigned int flags) > if (flags & RT) > batch_start -= 12 * sizeof(uint32_t); > > + if (w->unbound_duration) > + batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */ > + > mmap_start = rounddown(batch_start, PAGE_SIZE); > mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE); > > @@ -847,8 +866,19 @@ terminate_bb(struct w_step *w, unsigned int flags) > ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE); > cs = (uint32_t *)((char *)ptr + batch_start - mmap_start); > > + if (w->unbound_duration) { > + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t); > + batch_start += 4 * sizeof(uint32_t); > + > + *cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP; > + w->recursive_bb_start = cs; > + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; > + *cs++ = 0; > + *cs++ = 0; delta is zero, and mmap_len is consistent, so yup this gives a page of nops before looping. > + } > + > if (flags & SEQNO) { > - w->reloc[0].offset = batch_start + sizeof(uint32_t); > + w->reloc[r++].offset = batch_start + sizeof(uint32_t); > batch_start += 4 * sizeof(uint32_t); > > *cs++ = MI_STORE_DWORD_IMM; > @@ -860,7 +890,7 @@ terminate_bb(struct w_step *w, unsigned int flags) > } > > if (flags & RT) { > - w->reloc[1].offset = batch_start + sizeof(uint32_t); > + w->reloc[r++].offset = batch_start + sizeof(uint32_t); > batch_start += 4 * sizeof(uint32_t); > > *cs++ = MI_STORE_DWORD_IMM; > @@ -870,7 +900,7 @@ terminate_bb(struct w_step *w, unsigned int flags) > w->rt0_value = cs; > *cs++ = 0; > > - w->reloc[2].offset = batch_start + 2 * sizeof(uint32_t); > + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t); > batch_start += 4 * sizeof(uint32_t); > > *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */ > @@ -879,7 +909,7 @@ terminate_bb(struct w_step *w, unsigned int flags) > *cs++ = 0; > *cs++ = 0; > > - w->reloc[3].offset = batch_start + sizeof(uint32_t); > + w->reloc[r++].offset = batch_start + sizeof(uint32_t); > batch_start += 4 * sizeof(uint32_t); > > *cs++ = MI_STORE_DWORD_IMM; > @@ -891,6 +921,8 @@ terminate_bb(struct w_step *w, unsigned int flags) > } > > *cs = bbe; > + > + return r; > } > > static const unsigned int eb_engine_map[NUM_ENGINES] = { > @@ -1011,19 +1043,22 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) > } > } > > - w->bb_sz = get_bb_sz(w->duration.max); > - w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz); > + if (w->unbound_duration) > + /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */ > + w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) + > + (1 + 3) * sizeof(uint32_t); > + else > + w->bb_sz = get_bb_sz(w->duration.max); > + w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0)); > init_bb(w, flags); > - terminate_bb(w, flags); > + w->obj[j].relocation_count = terminate_bb(w, flags); > > - if (flags & SEQNO) { > + if (w->obj[j].relocation_count) { > w->obj[j].relocs_ptr = to_user_pointer(&w->reloc); > - if (flags & RT) > - w->obj[j].relocation_count = 4; > - else > - w->obj[j].relocation_count = 1; > for (i = 0; i < w->obj[j].relocation_count; i++) > w->reloc[i].target_handle = 1; > + if (w->unbound_duration) > + w->reloc[0].target_handle = j; > } That flows much better. Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx