Re: [RFC 2/2] igt/gem_workarounds: igt to test workaround registers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 08/08/2014 15:12, Daniel Vetter wrote:
On Fri, Aug 08, 2014 at 10:54:56AM +0100, arun.siluvery@xxxxxxxxxxxxxxx wrote:
From: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx>

Some of the workarounds are lost followed by a gpu reset, suspend/resume;
this patch adds a test which captures register state before and after
the test scenario.

This test currently verifies only bdw workarounds.

Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx>

Some comments below.

---
  lib/intel_reg.h         |   8 ++
  tests/Makefile.sources  |   1 +
  tests/gem_workarounds.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 220 insertions(+)
  create mode 100644 tests/gem_workarounds.c

diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 86175bb..d015c36 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -3628,4 +3628,12 @@ typedef enum {
  #define   GEN6_WIZ_HASHING_16x4			GEN6_WIZ_HASHING(1, 0)
  #define   GEN6_WIZ_HASHING_MASK			(GEN6_WIZ_HASHING(1, 1) << 16)

+#define GAMTARBMODE			0x04a08
+#define _3D_CHICKEN3			0x02090
+#define GAM_ECOCHK			0x4090
+#define CHICKEN_PAR1_1			0x42080
+#define GEN7_FF_THREAD_MODE		0x20a0
+#define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
+#define GEN8_UCGCTL6			0x9430
+
  #endif /* _I810_REG_H */
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 0eb9369..a17acd1 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -134,6 +134,7 @@ TESTS_progs = \
  	gem_unfence_active_buffers \
  	gem_unref_active_buffers \
  	gem_wait_render_timeout \
+	gem_workarounds \
  	gen3_mixed_blits \
  	gen3_render_linear_blits \
  	gen3_render_mixed_blits \
diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c
new file mode 100644
index 0000000..35d1aa7
--- /dev/null
+++ b/tests/gem_workarounds.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *  Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx>
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "igt_debugfs.h"
+#include "igt_aux.h"
+#include "intel_chipset.h"
+#include "intel_io.h"
+
+int drm_fd;
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+enum operation {
+	GPU_RESET,
+	SUSPEND_RESUME,

The suspend test doesn't seem to be wire up ...

Also I think it would be worth to have a module-reload version here too.

Suspend/Resume is not working; device is not resuming even after the timer is elapsed. Do we know suspend/resume works correctly on nightly?

+};
+
+struct workaround {
+	const char *reg_name;
+	uint32_t address;
+};
+
+static struct workaround bdw_workarounds[] =
+{
+	{ "GEN8_ROW_CHICKEN", GEN8_ROW_CHICKEN },
+	{ "GEN7_ROW_CHICKEN2", GEN7_ROW_CHICKEN2 },
+	{ "HALF_SLICE_CHICKEN3", HALF_SLICE_CHICKEN3 },
+	{ "GEN7_HALF_SLICE_CHICKEN1", GEN7_HALF_SLICE_CHICKEN1 },
+	{ "COMMON_SLICE_CHICKEN2", COMMON_SLICE_CHICKEN2 },
+	{ "HDC_CHICKEN0", HDC_CHICKEN0 },
+	{ "GEN7_CACHE_MODE_1", GEN7_CACHE_MODE_1 },
+	{ "GEN7_GT_MODE", GEN7_GT_MODE },
+	{ "GAMTARBMODE", GAMTARBMODE },
+	{ "_3D_CHICKEN3", _3D_CHICKEN3 },
+	{ "GAM_ECOCHK", GAM_ECOCHK },
+	{ "CHICKEN_PAR1_1", CHICKEN_PAR1_1 },
+	{ "GEN7_FF_THREAD_MODE", GEN7_FF_THREAD_MODE },
+	{ "GEN6_RC_SLEEP_PSMI_CONTROL", GEN6_RC_SLEEP_PSMI_CONTROL },
+	{ "GEN8_UCGCTL6", GEN8_UCGCTL6 },
+	{ "NULL", 0xFFFF },
+};

Crazy idea I've just had to validate that all the w/a table here is
up-to-date with the one in the kernel:

- We create a special WA_REG macro in the kernel which we use to wrap all
   registers used in workarounds at the specific use-site (i.e. not in the
   header). So

   I951_WRITE(WA_REG(GEN8_ROW_CHICKEN), ....);

- That macro then adds the register to a table which we can dump through
   debugs with a file called intel_wa_registers. This happens at runtime.
   This is important since a static list over all platforms might included
   registers which hang some platforms when we read them.

- A special subtest in this test here compares the kernel-provided list
   with the one supplied here and makes sure that all the w/a in the kernel
   list are in the test list, too. Or we just ditch the test list here
   completely, but that might not work for special cases where we only need
   to check some masks ...

Opinions on this? Would this help with maintaining this testcase and
ensuring that it is always up-to-date with the kernel w/a list? I really
want to make sure we get this right, there's been way too many cases where
w/a settings have been lost over resume, runtime pm, ctx switches ...

I will change the implementation to use this macro.
so in this case the table is updated before each use case (reset, suspend/resume, module reload etc)? Is it not sufficient to capture the state at the beginning? my understanding is the wa state should really stay the same and we compare the current state (eg after reset) to the one at the beginning rather than the state before reset.

I think it is easier to maintain if we completely remove the workaround list from igt itself, based on hardware macro can populate only those workarounds that are applicable but you mentioned that may not work for special cases, could you elaborate about these cases?

regards
Arun

+
+static void test_hang_gpu(void)
+{
+	int retry_count = 30;
+	enum stop_ring_flags flags;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec;
+	uint32_t b[2] = {MI_BATCH_BUFFER_END};
+
+	igt_assert(retry_count);
+	igt_set_stop_rings(STOP_RING_DEFAULTS);
+
+	memset(&gem_exec, 0, sizeof(gem_exec));
+	gem_exec.handle = gem_create(drm_fd, 4096);
+	gem_write(drm_fd, gem_exec.handle, 0, b, sizeof(b));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = (uintptr_t)&gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_len = sizeof(b);
+
+	drmIoctl(drm_fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
+
+	while(retry_count--) {
+		flags = igt_get_stop_rings();
+		if (flags == 0)
+			break;
+		printf("gpu hang not yet cleared, retries left %d\n", retry_count);
+		sleep(1);
+	}
+
+	flags = igt_get_stop_rings();
+	if (flags)
+		igt_set_stop_rings(STOP_RING_NONE);
+}
+
+static void test_suspend_resume(void)
+{
+	printf("Suspending the device ...\n");
+	igt_system_suspend_autoresume();
+}
+
+static void capture_wa_state(struct workaround *wa_regs, int num_wa,
+			     unsigned int *reg_values)
+{
+	int i;
+
+	igt_assert(reg_values);
+	intel_register_access_init(intel_get_pci_device(), 0);
+
+	for (i = 0; i < num_wa; ++i)
+		reg_values[i] = intel_register_read(wa_regs[i].address);
+
+	intel_register_access_fini();
+}
+
+static void check_workarounds(struct workaround *wa, enum operation op)
+{
+	int i;
+	int num_wa = 0;
+	unsigned int *before;
+	unsigned int *after;
+	bool fail = false;
+
+	while(wa[num_wa].address != 0xFFFF)
+		num_wa++;
+
+	igt_assert(num_wa);
+
+	before = malloc(num_wa * sizeof(*before));
+	memset(before, 0x00, num_wa * sizeof(*before));
+	capture_wa_state(wa, num_wa, before);
+
+	switch (op) {
+	case GPU_RESET:
+		test_hang_gpu();
+		break;
+
+	case SUSPEND_RESUME:
+		test_suspend_resume();
+		break;
+
+	default:
+		fail = true;
+		goto out;
+	}
+
+	after = malloc(num_wa * sizeof(*after));
+	memset(after, 0x00, num_wa * sizeof(*after));
+	capture_wa_state(wa, num_wa, after);
+
+	for (i = 0; i < num_wa; ++i) {
+		if (before[i] != after[i]) {
+			fail = true;
+			printf("%s workaround failed, before: 0x%08X, after: 0x%08X\n",
+			       wa[i].reg_name, before[i], after[i]);
+		}
+	}
+
+	free(after);
+
+out:
+	free(before);
+
+	igt_assert(fail == false);
+}
+
+int main(int argc, char **argv)
+{
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		drm_fd = drm_open_any();
+
+		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+		devid = intel_get_drm_devid(drm_fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("check-workaround-data-after-reset") {
+		if (IS_BROADWELL(devid))

The logic here should be switched around, or at least if you don't have a
w/a table for a given platform we should skip the test. And tbh for
anything gen8+ we should fail it so that someone knows there's still work
to to.
+			check_workarounds(&bdw_workarounds[0], GPU_RESET);

A simple

		else
			igt_skip_on("No w/a table found!\");

here should do the trick.

+	}
+
+
+	close(drm_fd);
+	igt_exit();
+}
--
2.0.4


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx



_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux