Re: [CI 1/5] igt/gem_workarounds: Read the workaround registers from the active context

Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> · Tue, 03 Oct 2017 18:19:10 +0300

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> The workarounds are only valid whilst the GPU is active. To be sure we
> are reading the registers in the right state, issue the reads from the GPU.
>

Yay, this is the right way :)

Some comments and findings below...

> v2: Show ignored write-only failures as debug.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> ---
>  tests/gem_workarounds.c | 147 ++++++++++++++++++++++++++----------------------
>  1 file changed, 81 insertions(+), 66 deletions(-)
>
> diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c
> index 5e30a7b8..95ec250a 100644
> --- a/tests/gem_workarounds.c
> +++ b/tests/gem_workarounds.c
> @@ -61,20 +61,6 @@ static struct write_only_list {
>  static struct intel_wa_reg *wa_regs;
>  static int num_wa_regs;
>  
> -static void wait_gpu(void)
> -{
> -	int fd = drm_open_driver(DRIVER_INTEL);
> -	gem_quiescent_gpu(fd);
> -	close(fd);
> -}
> -
> -static void test_hang_gpu(void)
> -{
> -	int fd = drm_open_driver(DRIVER_INTEL);
> -	igt_post_hang_ring(fd, igt_hang_ring(fd, I915_EXEC_DEFAULT));
> -	close(fd);
> -}
> -
>  static void test_suspend_resume(void)
>  {
>  	igt_info("Suspending the device ...\n");
> @@ -96,49 +82,95 @@ static bool write_only(const uint32_t addr)
>  	return false;
>  }
>  
> -static int workaround_fail_count(void)
> -{
> -	int i, fail_count = 0;
> -
> -	/* There is a small delay after coming ot of rc6 to the correct
> -	   render context values will get loaded by hardware (bdw,chv).
> -	   This here ensures that we have the correct context loaded before
> -	   we start to read values */
> -	wait_gpu();
> +#define MI_STORE_REGISTER_MEM (0x24 << 23)
>  
> -	igt_debug("Address\tval\t\tmask\t\tread\t\tresult\n");
> +static int workaround_fail_count(int fd)
> +{
> +	struct drm_i915_gem_exec_object2 obj[2];
> +	struct drm_i915_gem_relocation_entry *reloc;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	uint32_t result_sz, batch_sz;
> +	uint32_t *base, *out;
> +	int fail_count = 0;
> +
> +	reloc = calloc(num_wa_regs, sizeof(*reloc));
> +	igt_assert(reloc);
> +
> +	result_sz = 4 * num_wa_regs;
> +	result_sz = (result_sz + 4095) & -4096;

Macro for align? Further, why do even need it. For
what I can gather, the mapping should work for smaller
objects also.

> +
> +	batch_sz = 16 * num_wa_regs;
> +	batch_sz = (batch_sz + 4 + 4095) & -4096;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(fd, result_sz);
> +	gem_set_caching(fd, obj[0].handle, 1);

s/1/I915_CACHING_CACHED

> +	obj[1].handle = gem_create(fd, batch_sz);
> +	obj[1].relocs_ptr = to_user_pointer(reloc);
> +	obj[1].relocation_count = num_wa_regs;
> +
> +	out = base = gem_mmap__cpu(fd, obj[1].handle, 0, batch_sz, PROT_WRITE);
> +	for (int i = 0; i < num_wa_regs; i++) {
> +		*out++ = MI_STORE_REGISTER_MEM | ((gen >= 8 ? 4 : 2) - 2);
> +		*out++ = wa_regs[i].addr;
> +		reloc[i].target_handle = obj[0].handle;
> +		reloc[i].offset = (out - base) * sizeof(*out);
> +		reloc[i].delta = i * sizeof(uint32_t);
> +		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
> +		*out++ = reloc[i].delta;
> +		if (gen >= 8)
> +			*out++ = 0;
> +	}
> +	*out++ = MI_BATCH_BUFFER_END;
> +	munmap(base, batch_sz);
>  
> -	for (i = 0; i < num_wa_regs; ++i) {
> -		const uint32_t val = intel_register_read(wa_regs[i].addr);
> -		const bool ok = (wa_regs[i].value & wa_regs[i].mask) ==
> -			(val & wa_regs[i].mask);
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = 2;
> +	gem_execbuf(fd, &execbuf);
>  
> -		igt_debug("0x%05X\t0x%08X\t0x%08X\t0x%08X\t%s\n",
> -			  wa_regs[i].addr, wa_regs[i].value, wa_regs[i].mask,
> -			  val, ok ? "OK" : "FAIL");
> +	gem_set_domain(fd, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
>  
> -		if (write_only(wa_regs[i].addr))
> -			continue;
> +	igt_debug("Address\tval\t\tmask\t\tread\t\tresult\n");
>  
> -		if (!ok) {
> -			igt_warn("0x%05X\t0x%08X\t0x%08X\t0x%08X\t%s\n",
> -				 wa_regs[i].addr, wa_regs[i].value,
> -				 wa_regs[i].mask,
> -				 val, ok ? "OK" : "FAIL");
> +	out = gem_mmap__cpu(fd, obj[0].handle, 0, result_sz, PROT_READ);
> +	for (int i = 0; i < num_wa_regs; i++) {
> +		const bool ok =
> +			(wa_regs[i].value & wa_regs[i].mask) ==
> +			(out[i] & wa_regs[i].mask);
> +		char buf[80];
> +
> +		snprintf(buf, sizeof(buf),
> +			 "0x%05X\t0x%08X\t0x%08X\t0x%08X",
> +			 wa_regs[i].addr, wa_regs[i].value, wa_regs[i].mask,
> +			 out[i]);
> +
> +		if (ok) {
> +			igt_debug("%s\tOK\n", buf);
> +		} else if (write_only(wa_regs[i].addr)) {
> +			igt_debug("%s\tIGNORED (w/o)\n", buf);
> +		} else {
> +			igt_warn("%s\tFAIL\n", buf);
>  			fail_count++;
>  		}
>  	}
> +	munmap(out, result_sz);
> +
> +	gem_close(fd, obj[1].handle);
> +	gem_close(fd, obj[0].handle);
> +	free(reloc);
>  
>  	return fail_count;
>  }
>  
> -static void check_workarounds(enum operation op)
> +static void check_workarounds(int fd, enum operation op)
>  {
> -	igt_assert_eq(workaround_fail_count(), 0);
> +	igt_assert_eq(workaround_fail_count(fd), 0);
>  
>  	switch (op) {
>  	case GPU_RESET:
> -		test_hang_gpu();
> +		igt_force_gpu_reset(fd);

My kbl fails with the tests as you need some mechanism
to wait that the reset really did happen?

Hmm the kernel should ensure that the next reading batch
is post reset and everything should be fine.

(gem_workarounds:7286) WARNING: 0x024D0 0x00002248      0xFFFFFFFF      0x00002094      FAIL
(gem_workarounds:7286) WARNING: 0x024D4 0x00002580      0xFFFFFFFF      0x00002094      FAIL
(gem_workarounds:7286) WARNING: 0x024D8 0x00007304      0xFFFFFFFF      0x00002094      FAIL

which are fine pre reset...

-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx