RE: [PATCH libdrm 2/3] tests/amdgpu/ras: refine ras eject test

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks for Hawking's and Tao's review.
I will post patch v2 to address your comment.

Regards,
Guchun

-----Original Message-----
From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> 
Sent: Tuesday, August 6, 2019 6:07 PM
To: Chen, Guchun <Guchun.Chen@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Li, Dennis <Dennis.Li@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>
Cc: Li, Dennis <Dennis.Li@xxxxxxx>; Chen, Guchun <Guchun.Chen@xxxxxxx>
Subject: RE: [PATCH libdrm 2/3] tests/amdgpu/ras: refine ras eject test



-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Guchun Chen
Sent: 2019年8月6日 15:36
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Li, Dennis <Dennis.Li@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx>
Cc: Li, Dennis <Dennis.Li@xxxxxxx>; Chen, Guchun <Guchun.Chen@xxxxxxx>
Subject: [PATCH libdrm 2/3] tests/amdgpu/ras: refine ras eject test

Ras eject test framework is invalid with original codes, so refine it to make it work on top of kernel ras support.

Change-Id: I63ac27707a69133cd08fa0da308f255b1b169c1f
Signed-off-by: Dennis Li <dennis.li@xxxxxxx>
Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx>
---
 configure.ac             |  18 ++
 meson.build              |   1 +
 tests/amdgpu/Makefile.am |   5 +-
 tests/amdgpu/meson.build |  16 +-
 tests/amdgpu/ras_tests.c | 520 +++++++++++++++++++++++++++++++++++----
 5 files changed, 509 insertions(+), 51 deletions(-)

diff --git a/configure.ac b/configure.ac index 1cf91347..983b4371 100644
--- a/configure.ac
+++ b/configure.ac
@@ -430,10 +430,24 @@ if test "x$AMDGPU" != xno; then
 			AC_SUBST([CUNIT_CFLAGS])
 		fi
 	fi
+
+	# Detect json-c library
+	PKG_CHECK_MODULES([JSONC], [json-c >= 0.10.1], [have_jsonc=yes], [have_jsonc=no])
+	if test "x${have_jsonc}" = "xno"; then
+		AC_CHECK_LIB([json-c], [json_object_object_get], [have_jsonc=yes], [have_jsonc=no])
+		if test "x${have_jsonc}" = "xyes"; then
+			JSONC_LIBS="-ljson-c"
+			JSONC_CFLAGS=""
+			AC_SUBST([JSONC_LIBS])
+			AC_SUBST([JSONC_CFLAGS])
+		fi
+	fi
 else
 	have_cunit=no
+	have_jsonc=no
 fi
 AM_CONDITIONAL(HAVE_CUNIT, [test "x$have_cunit" != "xno"])
+AM_CONDITIONAL(HAVE_JSONC, [test "x$have_jsonc" != "xno"])
 
 AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes])  if test "x$AMDGPU" = xyes; then @@ -442,6 +456,10 @@ if test "x$AMDGPU" = xyes; then
 	if test "x$have_cunit" = "xno"; then
 		AC_MSG_WARN([Could not find cunit library. Disabling amdgpu tests])
 	fi
+
+	if test "x$have_jsonc" = "xno"; then
+		AC_MSG_WARN([Could not find json-c library. Disabling amdgpu tests])
+	fi
 else
 	AC_DEFINE(HAVE_AMDGPU, 0)
 fi
diff --git a/meson.build b/meson.build
index e292554a..bc5cfc58 100644
--- a/meson.build
+++ b/meson.build
@@ -217,6 +217,7 @@ libdrm_c_args = warn_c_args + ['-fvisibility=hidden']
 
 dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel)  dep_cunit = dependency('cunit', version : '>= 2.1', required : false)
+dep_json = dependency('json-c', version : '>= 0.10.1', required : 
+false)
 _cairo_tests = get_option('cairo-tests')  if _cairo_tests != 'false'
   dep_cairo = dependency('cairo', required : _cairo_tests == 'true') diff --git a/tests/amdgpu/Makefile.am b/tests/amdgpu/Makefile.am index 920882d0..339bb0a9 100644
--- a/tests/amdgpu/Makefile.am
+++ b/tests/amdgpu/Makefile.am
@@ -7,7 +7,8 @@ AM_CFLAGS = \
 
 LDADD = $(top_builddir)/libdrm.la \
 	$(top_builddir)/amdgpu/libdrm_amdgpu.la \
-	$(CUNIT_LIBS)
+	$(CUNIT_LIBS) \
+	$(JSONC_LIBS)
 
 if HAVE_INSTALL_TESTS
 bin_PROGRAMS = \
@@ -17,7 +18,7 @@ noinst_PROGRAMS = \
 	amdgpu_test
 endif
 
-amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS)
+amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS) $(JSONC_CFLAGS)
 
 amdgpu_test_SOURCES = \
 	amdgpu_test.c \
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build index 1726cb43..4307295e 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -18,7 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  # SOFTWARE.
 
-if dep_cunit.found()
+if dep_cunit.found() and dep_json.found()
   amdgpu_test = executable(
     'amdgpu_test',
     files(
@@ -26,9 +26,19 @@ if dep_cunit.found()
       'vce_tests.c', 'uvd_enc_tests.c', 'vcn_tests.c', 'deadlock_tests.c',
       'vm_tests.c', 'ras_tests.c', 'syncobj_tests.c',
     ),
-    dependencies : [dep_cunit, dep_threads],
+    dependencies : [dep_cunit, dep_json, dep_threads],
     include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')],
     link_with : [libdrm, libdrm_amdgpu],
     install : with_install_tests,
   )
-endif
+
+  configure_file(input : '../../data/amdgpu_ras.json',
+    output : 'amdgpu_ras.json',
+    configuration : configuration_data())
+
+  install_data(
+    '../../data/amdgpu_ras.json',
+    install_mode : 'rw-r--r--',
+    install_dir : datadir_amdgpu,
+  )
+endif
\ No newline at end of file
diff --git a/tests/amdgpu/ras_tests.c b/tests/amdgpu/ras_tests.c index 81c34ad6..c72d53e9 100644
--- a/tests/amdgpu/ras_tests.c
+++ b/tests/amdgpu/ras_tests.c
@@ -30,6 +30,7 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include "xf86drm.h"
+#include "json.h"
 
 const char *ras_block_string[] = {
 	"umc",
@@ -72,11 +73,252 @@ enum amdgpu_ras_block {  #define AMDGPU_RAS_BLOCK_COUNT  AMDGPU_RAS_BLOCK__LAST
 #define AMDGPU_RAS_BLOCK_MASK   ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
 
+enum amdgpu_ras_gfx_subblock {
+	/* CPC */
+	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+	/* CPF */
+	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+	AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+	/* CPG */
+	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+	/* GDS */
+	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+	/* SPI */
+	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+	/* SQ */
+	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+	/* SQC (3 ranges) */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+	/* SQC range 0 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+	/* SQC range 1 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+	/* SQC range 2 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+	/* TA */
+	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+	/* TCA */
+	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+	/* TCC (5 sub-ranges) */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+	/* TCC range 0 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+	/* TCC range 1 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+	/* TCC range 2 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+	/* TCC range 3 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+	/* TCC range 4 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+	/* TCI */
+	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+	/* TCP */
+	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+	/* TD */
+	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+	/* EA (3 sub-ranges) */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+	/* EA range 0 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+	/* EA range 1 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+	/* EA range 2 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+	/* UTC VM L2 bank */
+	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+	/* UTC VM walker */
+	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+	/* UTC ATC L2 2MB cache */
+	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+	/* UTC ATC L2 4KB cache */
+	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+	AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
 enum amdgpu_ras_error_type {
-	AMDGPU_RAS_ERROR__NONE				= 0,
-	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE		= 2,
-	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE		= 4,
-	AMDGPU_RAS_ERROR__POISON			= 8,
+	AMDGPU_RAS_ERROR__NONE					= 0,
+	AMDGPU_RAS_ERROR__PARITY				= 1,
+	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE			= 2,
+	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE			= 4,
+	AMDGPU_RAS_ERROR__POISON				= 8,
+};
+
+struct ras_test_item {
+	char name[64];
+	int block;
+	int sub_block;
+	char error_type_str[64];
+	enum amdgpu_ras_error_type type;
+	uint64_t address;
+	uint64_t value;
 };
 
 struct ras_common_if {
@@ -100,8 +342,10 @@ struct ras_debug_if {
 	int op;
 };
 /* for now, only umc, gfx, sdma has implemented. */ -#define DEFAULT_RAS_BLOCK_MASK_INJECT (1 << AMDGPU_RAS_BLOCK__UMC) -#define DEFAULT_RAS_BLOCK_MASK_QUERY (1 << AMDGPU_RAS_BLOCK__UMC)
+#define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
+		(1 << AMDGPU_RAS_BLOCK__GFX))
+#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
+		(1 << AMDGPU_RAS_BLOCK__GFX))
 #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
 		(1 << AMDGPU_RAS_BLOCK__SDMA) |\
 		(1 << AMDGPU_RAS_BLOCK__GFX))
@@ -453,6 +697,32 @@ static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
 	return 0;
 }
 
+static int amdgpu_ras_inject(enum amdgpu_ras_block block,
+		uint32_t sub_block, enum amdgpu_ras_error_type type,
+		uint64_t address, uint64_t value)
+{
+	struct ras_debug_if data = { .op = 2, };
+	struct ras_inject_if *inject = &data.inject;
+	int ret;
+
+	if (amdgpu_ras_is_feature_enabled(block) <= 0)
+		return -1;
[Tao] Maybe a print can be added here to indicate the error reason.

+
+	inject->head.block = block;
+	inject->head.type = type;
+	inject->head.sub_block_index = sub_block;
+	strncpy(inject->head.name, ras_block_str(block), 32);
+	inject->address = address;
+	inject->value = value;
+
+	ret = amdgpu_ras_invoke(&data);
+	CU_ASSERT_EQUAL(ret, 0);
+	if (ret)
+		return -1;
+
+	return 0;
+}
+
 //tests
 static void amdgpu_ras_features_test(int enable)  { @@ -503,66 +773,224 @@ static void amdgpu_ras_enable_test(void)
 	}
 }
 
-static void __amdgpu_ras_inject_test(void)
+static int _json_get_block_id(json_object *block_obj, const char *name)
 {
-	struct ras_debug_if data;
-	int ret;
-	int i;
-	unsigned long ue, ce, ue_old, ce_old;
+	json_object *item_obj, *index_obj;
 
-	data.op = 2;
-	for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
-		int timeout = 3;
-		struct ras_inject_if inject = {
-			.head = {
-				.block = i,
-				.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-				.sub_block_index = 0,
-				.name = "",
-			},
-			.address = 0,
-			.value = 0,
-		};
+	if (!json_object_object_get_ex(block_obj, name, &item_obj))
+		return -1;
 
-		if (amdgpu_ras_is_feature_enabled(i) <= 0)
-			continue;
+	if (!json_object_object_get_ex(item_obj, "index", &index_obj))
+		return -1;
 
-		if (!((1 << i) & ras_block_mask_inject))
-			continue;
+	return json_object_get_int(index_obj); }
 
-		data.inject = inject;
+static int _json_get_subblock_id(json_object *block_obj, const char *block_name,
+				 const char *subblock_name)
+{
+	json_object *item_obj, *subblock_obj, *name_obj;
 
-		ret = amdgpu_ras_query_err_count(i, &ue_old, &ce_old);
-		CU_ASSERT_EQUAL(ret, 0);
+	if (!json_object_object_get_ex(block_obj, block_name, &item_obj))
+		return -1;
 
-		if (ret)
-			continue;
+	if (!json_object_object_get_ex(item_obj, "subblock", &subblock_obj))
+		return -1;
 
-		ret = amdgpu_ras_invoke(&data);
+	if (!json_object_object_get_ex(subblock_obj, subblock_name, &name_obj))
+		return -1;
+
+	return json_object_get_int(name_obj);
+}
+
+static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int
+*size) {
+	json_object *root_obj = NULL;
+	json_object *block_obj = NULL;
+	json_object *type_obj = NULL;
+	json_object *tests_obj = NULL;
+	json_object *test_obj = NULL;
+	json_object *tmp_obj = NULL;
+	json_object *tmp_type_obj = NULL;
+	json_object *subblock_obj = NULL;
+	int i, length;
+	struct ras_test_item *items = NULL;
+	int ret = -1;
+
+	root_obj = json_object_from_file("./amdgpu_ras.json");
+	if (!root_obj)
+		root_obj = json_object_from_file(
+			"/usr/share/libdrm/amdgpu_ras.json");
+
+	if (!root_obj) {
+		CU_FAIL_FATAL("Couldn't find amdgpu_ras.json");
+		goto pro_end;
+	}
+
+	/* Check Version */
+	if (!json_object_object_get_ex(root_obj, "version", &tmp_obj)) {
+		CU_FAIL_FATAL("Wrong format of amdgpu_ras.json");
+		goto pro_end;
+	}
+
+	/* Block Definition */
+	if (!json_object_object_get_ex(root_obj, "block", &block_obj)) {
+		fprintf(stderr, "block isn't defined\n");
+		goto pro_end;
+	}
+
+	/* Type Definition */
+	if (!json_object_object_get_ex(root_obj, "type", &type_obj)) {
+		fprintf(stderr, "type isn't defined\n");
+		goto pro_end;
+	}
+
+	/* Enumulate test items */
+	if (!json_object_object_get_ex(root_obj, "tests", &tests_obj)) {
+		fprintf(stderr, "tests are empty\n");
+		goto pro_end;
+	}
+
+	length = json_object_array_length(tests_obj);
+
+	items = malloc(sizeof(struct ras_test_item) * length);
+	if (!items) {
+		fprintf(stderr, "malloc failed\n");
+		goto pro_end;
+	}
+
+	for (i = 0; i < length; i++) {
+		test_obj = json_object_array_get_idx(tests_obj, i);
+
+		/* Name */
+		if (!json_object_object_get_ex(test_obj, "name", &tmp_obj)) {
+			fprintf(stderr, "Test %d has no name\n", i);
+			goto pro_end;
+		}
+		strncpy(items[i].name, json_object_get_string(tmp_obj), 64);
+
+		/* block */
+		if (!json_object_object_get_ex(test_obj, "block", &tmp_obj)) {
+			fprintf(stderr, "Test:%s: block isn't defined\n",
+				items[i].name);
+			goto pro_end;
+		}
+		items[i].block = _json_get_block_id(
+			block_obj, json_object_get_string(tmp_obj));
+
+		/* check block id */
+		if (items[i].block < AMDGPU_RAS_BLOCK__UMC ||
[Tao] AMDGPU_RAS_BLOCK__START can be defined to replace AMDGPU_RAS_BLOCK__UMC

+		    items[i].block >= AMDGPU_RAS_BLOCK__LAST) {
+			fprintf(stderr, "Test:%s: block id %d is invalid\n",
+				items[i].name, items[i].block);
+			goto pro_end;
+		}
+
+		/* subblock */
+		if (json_object_object_get_ex(test_obj, "subblock", &tmp_obj)) {
+			json_object_object_get_ex(test_obj, "block",
+				&subblock_obj);
+
+			items[i].sub_block = _json_get_subblock_id(
+				block_obj,
+				json_object_get_string(subblock_obj),
+				json_object_get_string(tmp_obj));
[Tao] It's better to check items[i].sub_block returned by _json_get_subblock_id and add a print.

+		} else
+			items[i].sub_block = 0;
+
+		/* type */
+		if (json_object_object_get_ex(test_obj, "type", &tmp_obj)) {
+			strncpy(items[i].error_type_str,
+				json_object_get_string(tmp_obj), 64);
+
+			if (json_object_object_get_ex(type_obj,
+				json_object_get_string(tmp_obj), &tmp_type_obj))
+				items[i].type = json_object_get_int(tmp_type_obj);
+			else
+				items[i].type = (enum amdgpu_ras_error_type)0;
+		}
+
+		/* address */
+		if (json_object_object_get_ex(test_obj, "address", &tmp_obj))
+			items[i].address = json_object_get_int(tmp_obj);
+		else
+			items[i].address = 0; /* default address 0 */
+
+		/* value */
+		if (json_object_object_get_ex(test_obj, "value", &tmp_obj))
+			items[i].value = json_object_get_int(tmp_obj);
+		else
+			items[i].value = 0; /* default value 0 */
+	}
+
+	*pitems = items;
+	*size = length;
+	ret = 0;
+pro_end:
+	if (root_obj)
+		json_object_put(root_obj);
+
+	return ret;
+}
+
+static void __amdgpu_ras_inject_test(void) {
+	struct ras_test_item *items = NULL;
+	int i, size;
+	int ret;
+	unsigned long old_ue, old_ce;
+	unsigned long ue, ce;
+	int timeout;
+	bool pass;
+
+	ret = amdgpu_ras_get_test_items(&items, &size);
+	CU_ASSERT_EQUAL(ret, 0);
+	if (ret)
+		goto mem_free;
+
+	printf("...\n");
+	for (i = 0; i < size; i++) {
+		timeout = 3;
+		pass = false;
+
+		ret = amdgpu_ras_query_err_count(items[i].block, &old_ue,
+						 &old_ce);
 		CU_ASSERT_EQUAL(ret, 0);
+		if (ret)
+			break;
 
+		ret = amdgpu_ras_inject(items[i].block, items[i].sub_block,
+					items[i].type, items[i].address,
+					items[i].value);
+		CU_ASSERT_EQUAL(ret, 0);
 		if (ret)
-			continue;
+			break;
 
-loop:
 		while (timeout > 0) {
-			ret = amdgpu_ras_query_err_count(i, &ue, &ce);
-			CU_ASSERT_EQUAL(ret, 0);
+			sleep(5);
 
+			ret = amdgpu_ras_query_err_count(items[i].block, &ue,
+							 &ce);
+			CU_ASSERT_EQUAL(ret, 0);
 			if (ret)
-				continue;
-			if (ue_old != ue) {
-				/*recovery takes ~10s*/
-				sleep(10);
 				break;
-			}
 
-			sleep(1);
+			if (old_ue != ue || old_ce != ce) {
+				pass = true;
+				sleep(20);
+				break;
+			}
 			timeout -= 1;
 		}
+		printf("\t Test %s@%s, address %ld, value %ld: %s\n",
+			items[i].name, items[i].error_type_str, items[i].address,
+			items[i].value,	pass ? "Pass" : "Fail");
+	}
 
-		CU_ASSERT_EQUAL(ue_old + 1, ue);
-		CU_ASSERT_EQUAL(ce_old, ce);
+mem_free:
+	if (items) {
+		free(items);
+		items = NULL;
 	}
 }
 
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux