Thanks for Hawking's and Tao's review. I will post patch v2 to address your comment. Regards, Guchun -----Original Message----- From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> Sent: Tuesday, August 6, 2019 6:07 PM To: Chen, Guchun <Guchun.Chen@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Li, Dennis <Dennis.Li@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx> Cc: Li, Dennis <Dennis.Li@xxxxxxx>; Chen, Guchun <Guchun.Chen@xxxxxxx> Subject: RE: [PATCH libdrm 2/3] tests/amdgpu/ras: refine ras eject test -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Guchun Chen Sent: 2019年8月6日 15:36 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Li, Dennis <Dennis.Li@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx> Cc: Li, Dennis <Dennis.Li@xxxxxxx>; Chen, Guchun <Guchun.Chen@xxxxxxx> Subject: [PATCH libdrm 2/3] tests/amdgpu/ras: refine ras eject test Ras eject test framework is invalid with original codes, so refine it to make it work on top of kernel ras support. Change-Id: I63ac27707a69133cd08fa0da308f255b1b169c1f Signed-off-by: Dennis Li <dennis.li@xxxxxxx> Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx> --- configure.ac | 18 ++ meson.build | 1 + tests/amdgpu/Makefile.am | 5 +- tests/amdgpu/meson.build | 16 +- tests/amdgpu/ras_tests.c | 520 +++++++++++++++++++++++++++++++++++---- 5 files changed, 509 insertions(+), 51 deletions(-) diff --git a/configure.ac b/configure.ac index 1cf91347..983b4371 100644 --- a/configure.ac +++ b/configure.ac @@ -430,10 +430,24 @@ if test "x$AMDGPU" != xno; then AC_SUBST([CUNIT_CFLAGS]) fi fi + + # Detect json-c library + PKG_CHECK_MODULES([JSONC], [json-c >= 0.10.1], [have_jsonc=yes], [have_jsonc=no]) + if test "x${have_jsonc}" = "xno"; then + AC_CHECK_LIB([json-c], [json_object_object_get], [have_jsonc=yes], [have_jsonc=no]) + if test "x${have_jsonc}" = "xyes"; then + JSONC_LIBS="-ljson-c" + JSONC_CFLAGS="" + AC_SUBST([JSONC_LIBS]) + AC_SUBST([JSONC_CFLAGS]) + fi + fi else have_cunit=no + have_jsonc=no fi AM_CONDITIONAL(HAVE_CUNIT, [test "x$have_cunit" != "xno"]) +AM_CONDITIONAL(HAVE_JSONC, [test "x$have_jsonc" != "xno"]) AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes]) if test "x$AMDGPU" = xyes; then @@ -442,6 +456,10 @@ if test "x$AMDGPU" = xyes; then if test "x$have_cunit" = "xno"; then AC_MSG_WARN([Could not find cunit library. Disabling amdgpu tests]) fi + + if test "x$have_jsonc" = "xno"; then + AC_MSG_WARN([Could not find json-c library. Disabling amdgpu tests]) + fi else AC_DEFINE(HAVE_AMDGPU, 0) fi diff --git a/meson.build b/meson.build index e292554a..bc5cfc58 100644 --- a/meson.build +++ b/meson.build @@ -217,6 +217,7 @@ libdrm_c_args = warn_c_args + ['-fvisibility=hidden'] dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel) dep_cunit = dependency('cunit', version : '>= 2.1', required : false) +dep_json = dependency('json-c', version : '>= 0.10.1', required : +false) _cairo_tests = get_option('cairo-tests') if _cairo_tests != 'false' dep_cairo = dependency('cairo', required : _cairo_tests == 'true') diff --git a/tests/amdgpu/Makefile.am b/tests/amdgpu/Makefile.am index 920882d0..339bb0a9 100644 --- a/tests/amdgpu/Makefile.am +++ b/tests/amdgpu/Makefile.am @@ -7,7 +7,8 @@ AM_CFLAGS = \ LDADD = $(top_builddir)/libdrm.la \ $(top_builddir)/amdgpu/libdrm_amdgpu.la \ - $(CUNIT_LIBS) + $(CUNIT_LIBS) \ + $(JSONC_LIBS) if HAVE_INSTALL_TESTS bin_PROGRAMS = \ @@ -17,7 +18,7 @@ noinst_PROGRAMS = \ amdgpu_test endif -amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS) +amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS) $(JSONC_CFLAGS) amdgpu_test_SOURCES = \ amdgpu_test.c \ diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build index 1726cb43..4307295e 100644 --- a/tests/amdgpu/meson.build +++ b/tests/amdgpu/meson.build @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -if dep_cunit.found() +if dep_cunit.found() and dep_json.found() amdgpu_test = executable( 'amdgpu_test', files( @@ -26,9 +26,19 @@ if dep_cunit.found() 'vce_tests.c', 'uvd_enc_tests.c', 'vcn_tests.c', 'deadlock_tests.c', 'vm_tests.c', 'ras_tests.c', 'syncobj_tests.c', ), - dependencies : [dep_cunit, dep_threads], + dependencies : [dep_cunit, dep_json, dep_threads], include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')], link_with : [libdrm, libdrm_amdgpu], install : with_install_tests, ) -endif + + configure_file(input : '../../data/amdgpu_ras.json', + output : 'amdgpu_ras.json', + configuration : configuration_data()) + + install_data( + '../../data/amdgpu_ras.json', + install_mode : 'rw-r--r--', + install_dir : datadir_amdgpu, + ) +endif \ No newline at end of file diff --git a/tests/amdgpu/ras_tests.c b/tests/amdgpu/ras_tests.c index 81c34ad6..c72d53e9 100644 --- a/tests/amdgpu/ras_tests.c +++ b/tests/amdgpu/ras_tests.c @@ -30,6 +30,7 @@ #include <fcntl.h> #include <stdio.h> #include "xf86drm.h" +#include "json.h" const char *ras_block_string[] = { "umc", @@ -72,11 +73,252 @@ enum amdgpu_ras_block { #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { + /* CPC */ + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF */ + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, + AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + /* CPG */ + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + /* GDS */ + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ */ + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, + AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA */ + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA */ + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI */ + AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP */ + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + AMDGPU_RAS_BLOCK__GFX_MAX +}; + enum amdgpu_ras_error_type { - AMDGPU_RAS_ERROR__NONE = 0, - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, - AMDGPU_RAS_ERROR__POISON = 8, + AMDGPU_RAS_ERROR__NONE = 0, + AMDGPU_RAS_ERROR__PARITY = 1, + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + AMDGPU_RAS_ERROR__POISON = 8, +}; + +struct ras_test_item { + char name[64]; + int block; + int sub_block; + char error_type_str[64]; + enum amdgpu_ras_error_type type; + uint64_t address; + uint64_t value; }; struct ras_common_if { @@ -100,8 +342,10 @@ struct ras_debug_if { int op; }; /* for now, only umc, gfx, sdma has implemented. */ -#define DEFAULT_RAS_BLOCK_MASK_INJECT (1 << AMDGPU_RAS_BLOCK__UMC) -#define DEFAULT_RAS_BLOCK_MASK_QUERY (1 << AMDGPU_RAS_BLOCK__UMC) +#define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\ + (1 << AMDGPU_RAS_BLOCK__GFX)) +#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\ + (1 << AMDGPU_RAS_BLOCK__GFX)) #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\ (1 << AMDGPU_RAS_BLOCK__SDMA) |\ (1 << AMDGPU_RAS_BLOCK__GFX)) @@ -453,6 +697,32 @@ static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block, return 0; } +static int amdgpu_ras_inject(enum amdgpu_ras_block block, + uint32_t sub_block, enum amdgpu_ras_error_type type, + uint64_t address, uint64_t value) +{ + struct ras_debug_if data = { .op = 2, }; + struct ras_inject_if *inject = &data.inject; + int ret; + + if (amdgpu_ras_is_feature_enabled(block) <= 0) + return -1; [Tao] Maybe a print can be added here to indicate the error reason. + + inject->head.block = block; + inject->head.type = type; + inject->head.sub_block_index = sub_block; + strncpy(inject->head.name, ras_block_str(block), 32); + inject->address = address; + inject->value = value; + + ret = amdgpu_ras_invoke(&data); + CU_ASSERT_EQUAL(ret, 0); + if (ret) + return -1; + + return 0; +} + //tests static void amdgpu_ras_features_test(int enable) { @@ -503,66 +773,224 @@ static void amdgpu_ras_enable_test(void) } } -static void __amdgpu_ras_inject_test(void) +static int _json_get_block_id(json_object *block_obj, const char *name) { - struct ras_debug_if data; - int ret; - int i; - unsigned long ue, ce, ue_old, ce_old; + json_object *item_obj, *index_obj; - data.op = 2; - for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { - int timeout = 3; - struct ras_inject_if inject = { - .head = { - .block = i, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "", - }, - .address = 0, - .value = 0, - }; + if (!json_object_object_get_ex(block_obj, name, &item_obj)) + return -1; - if (amdgpu_ras_is_feature_enabled(i) <= 0) - continue; + if (!json_object_object_get_ex(item_obj, "index", &index_obj)) + return -1; - if (!((1 << i) & ras_block_mask_inject)) - continue; + return json_object_get_int(index_obj); } - data.inject = inject; +static int _json_get_subblock_id(json_object *block_obj, const char *block_name, + const char *subblock_name) +{ + json_object *item_obj, *subblock_obj, *name_obj; - ret = amdgpu_ras_query_err_count(i, &ue_old, &ce_old); - CU_ASSERT_EQUAL(ret, 0); + if (!json_object_object_get_ex(block_obj, block_name, &item_obj)) + return -1; - if (ret) - continue; + if (!json_object_object_get_ex(item_obj, "subblock", &subblock_obj)) + return -1; - ret = amdgpu_ras_invoke(&data); + if (!json_object_object_get_ex(subblock_obj, subblock_name, &name_obj)) + return -1; + + return json_object_get_int(name_obj); +} + +static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int +*size) { + json_object *root_obj = NULL; + json_object *block_obj = NULL; + json_object *type_obj = NULL; + json_object *tests_obj = NULL; + json_object *test_obj = NULL; + json_object *tmp_obj = NULL; + json_object *tmp_type_obj = NULL; + json_object *subblock_obj = NULL; + int i, length; + struct ras_test_item *items = NULL; + int ret = -1; + + root_obj = json_object_from_file("./amdgpu_ras.json"); + if (!root_obj) + root_obj = json_object_from_file( + "/usr/share/libdrm/amdgpu_ras.json"); + + if (!root_obj) { + CU_FAIL_FATAL("Couldn't find amdgpu_ras.json"); + goto pro_end; + } + + /* Check Version */ + if (!json_object_object_get_ex(root_obj, "version", &tmp_obj)) { + CU_FAIL_FATAL("Wrong format of amdgpu_ras.json"); + goto pro_end; + } + + /* Block Definition */ + if (!json_object_object_get_ex(root_obj, "block", &block_obj)) { + fprintf(stderr, "block isn't defined\n"); + goto pro_end; + } + + /* Type Definition */ + if (!json_object_object_get_ex(root_obj, "type", &type_obj)) { + fprintf(stderr, "type isn't defined\n"); + goto pro_end; + } + + /* Enumulate test items */ + if (!json_object_object_get_ex(root_obj, "tests", &tests_obj)) { + fprintf(stderr, "tests are empty\n"); + goto pro_end; + } + + length = json_object_array_length(tests_obj); + + items = malloc(sizeof(struct ras_test_item) * length); + if (!items) { + fprintf(stderr, "malloc failed\n"); + goto pro_end; + } + + for (i = 0; i < length; i++) { + test_obj = json_object_array_get_idx(tests_obj, i); + + /* Name */ + if (!json_object_object_get_ex(test_obj, "name", &tmp_obj)) { + fprintf(stderr, "Test %d has no name\n", i); + goto pro_end; + } + strncpy(items[i].name, json_object_get_string(tmp_obj), 64); + + /* block */ + if (!json_object_object_get_ex(test_obj, "block", &tmp_obj)) { + fprintf(stderr, "Test:%s: block isn't defined\n", + items[i].name); + goto pro_end; + } + items[i].block = _json_get_block_id( + block_obj, json_object_get_string(tmp_obj)); + + /* check block id */ + if (items[i].block < AMDGPU_RAS_BLOCK__UMC || [Tao] AMDGPU_RAS_BLOCK__START can be defined to replace AMDGPU_RAS_BLOCK__UMC + items[i].block >= AMDGPU_RAS_BLOCK__LAST) { + fprintf(stderr, "Test:%s: block id %d is invalid\n", + items[i].name, items[i].block); + goto pro_end; + } + + /* subblock */ + if (json_object_object_get_ex(test_obj, "subblock", &tmp_obj)) { + json_object_object_get_ex(test_obj, "block", + &subblock_obj); + + items[i].sub_block = _json_get_subblock_id( + block_obj, + json_object_get_string(subblock_obj), + json_object_get_string(tmp_obj)); [Tao] It's better to check items[i].sub_block returned by _json_get_subblock_id and add a print. + } else + items[i].sub_block = 0; + + /* type */ + if (json_object_object_get_ex(test_obj, "type", &tmp_obj)) { + strncpy(items[i].error_type_str, + json_object_get_string(tmp_obj), 64); + + if (json_object_object_get_ex(type_obj, + json_object_get_string(tmp_obj), &tmp_type_obj)) + items[i].type = json_object_get_int(tmp_type_obj); + else + items[i].type = (enum amdgpu_ras_error_type)0; + } + + /* address */ + if (json_object_object_get_ex(test_obj, "address", &tmp_obj)) + items[i].address = json_object_get_int(tmp_obj); + else + items[i].address = 0; /* default address 0 */ + + /* value */ + if (json_object_object_get_ex(test_obj, "value", &tmp_obj)) + items[i].value = json_object_get_int(tmp_obj); + else + items[i].value = 0; /* default value 0 */ + } + + *pitems = items; + *size = length; + ret = 0; +pro_end: + if (root_obj) + json_object_put(root_obj); + + return ret; +} + +static void __amdgpu_ras_inject_test(void) { + struct ras_test_item *items = NULL; + int i, size; + int ret; + unsigned long old_ue, old_ce; + unsigned long ue, ce; + int timeout; + bool pass; + + ret = amdgpu_ras_get_test_items(&items, &size); + CU_ASSERT_EQUAL(ret, 0); + if (ret) + goto mem_free; + + printf("...\n"); + for (i = 0; i < size; i++) { + timeout = 3; + pass = false; + + ret = amdgpu_ras_query_err_count(items[i].block, &old_ue, + &old_ce); CU_ASSERT_EQUAL(ret, 0); + if (ret) + break; + ret = amdgpu_ras_inject(items[i].block, items[i].sub_block, + items[i].type, items[i].address, + items[i].value); + CU_ASSERT_EQUAL(ret, 0); if (ret) - continue; + break; -loop: while (timeout > 0) { - ret = amdgpu_ras_query_err_count(i, &ue, &ce); - CU_ASSERT_EQUAL(ret, 0); + sleep(5); + ret = amdgpu_ras_query_err_count(items[i].block, &ue, + &ce); + CU_ASSERT_EQUAL(ret, 0); if (ret) - continue; - if (ue_old != ue) { - /*recovery takes ~10s*/ - sleep(10); break; - } - sleep(1); + if (old_ue != ue || old_ce != ce) { + pass = true; + sleep(20); + break; + } timeout -= 1; } + printf("\t Test %s@%s, address %ld, value %ld: %s\n", + items[i].name, items[i].error_type_str, items[i].address, + items[i].value, pass ? "Pass" : "Fail"); + } - CU_ASSERT_EQUAL(ue_old + 1, ue); - CU_ASSERT_EQUAL(ce_old, ce); +mem_free: + if (items) { + free(items); + items = NULL; } } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx