On Mon, Jul 24, 2017 at 11:25 AM, Tom St Denis <tom.stdenis at amd.com> wrote: > This patch involves two things I was working on at once so the > patches are a bit intertwined. It adds > > 1. The ability to halt SQ waves when reading waves on CIK and later > ASICs. > > 2. The ability to enable verbose decoding when reading vram > > 3. The ability to decode virtual addresses in the mmhub (for say VCN) > > Signed-off-by: Tom St Denis <tom.stdenis at amd.com> Series is: Acked-by: Alex Deucher <alexander.deucher at amd.com> > --- > doc/umr.1 | 6 +++ > src/app/main.c | 15 +++++-- > src/app/print_waves.c | 6 +++ > src/lib/CMakeLists.txt | 1 + > src/lib/dump_ib.c | 6 ++- > src/lib/find_reg.c | 11 ++++- > src/lib/mmio.c | 36 +++++++++++++---- > src/lib/read_vram.c | 98 ++++++++++++++++++++++++++++++++++++++------- > src/lib/ring_decode.c | 4 +- > src/lib/sq_cmd_halt_waves.c | 57 ++++++++++++++++++++++++++ > src/umr.h | 26 ++++++++++++ > 11 files changed, 233 insertions(+), 33 deletions(-) > create mode 100644 src/lib/sq_cmd_halt_waves.c > > diff --git a/doc/umr.1 b/doc/umr.1 > index 4c720ba48840..da432a13b0d1 100644 > --- a/doc/umr.1 > +++ b/doc/umr.1 > @@ -148,6 +148,12 @@ separated strings. Options should be specified before --update or --force comma > be used only if the KMD is hung or otherwise not working correctly. Using it on live systems > may result in race conditions. > > +.B verbose > + Enable verbose diagnostics (used in --vram). > + > +.B halt_waves > + Halt/resume all waves while reading wave status. > + > .SH "Notes" > > - The "Waves" field in the DRM section of --top only works if GFX PG has been disabled. Otherwise, > diff --git a/src/app/main.c b/src/app/main.c > index 6e0bc57200b0..7e3914155a22 100644 > --- a/src/app/main.c > +++ b/src/app/main.c > @@ -107,6 +107,10 @@ static void parse_options(char *str) > options.quiet = 1; > } else if (!strcmp(option, "follow_ib")) { > options.follow_ib = 1; > + } else if (!strcmp(option, "verbose")) { > + options.verbose = 1; > + } else if (!strcmp(option, "halt_waves")) { > + options.halt_waves = 1; > } else if (!strcmp(option, "no_kernel")) { > options.no_kernel = 1; > options.use_pci = 1; > @@ -422,12 +426,15 @@ int main(int argc, char **argv) > "\n\t--top, -t\n\t\tSummarize GPU utilization. Can select a SE block with --bank. Can use" > "\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n" > "\n\t--waves, -wa\n\t\tPrint out information about any active CU waves. Can use '-O bits'" > - "\n\t\tto see decoding of various wave fields.\n" > + "\n\t\tto see decoding of various wave fields. Can use the '-O halt_waves' option" > + "\n\t\tto halt the SQ while reading registers.\n" > "\n\t--vram, -v [<vmid>@]<address> <size>" > "\n\t\tRead 'size' bytes (in hex) from a given address (in hex) to stdout. Optionally" > - "\n\t\tspecify the VMID (in decimal) treating the address as a virtual address instead.\n" > -"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: risky, bits, bitsfull, empty_log, follow, named, many," > - "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel.\n" > + "\n\t\tspecify the VMID (in decimal or in hex with a '0x' prefix) treating the address" > + "\n\t\tas a virtual address instead. Can use 'verbose' option to print out PDE/PTE" > + "\n\t\tdecodings.\n" > +"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: bits, bitsfull, empty_log, follow, named, many," > + "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel, verbose, halt_waves.\n" > "\n\n", UMR_BUILD_VER, UMR_BUILD_REV); > exit(EXIT_SUCCESS); > } else { > diff --git a/src/app/print_waves.c b/src/app/print_waves.c > index e157db9f9386..1efd8a13bd28 100644 > --- a/src/app/print_waves.c > +++ b/src/app/print_waves.c > @@ -40,6 +40,9 @@ void umr_print_waves(struct umr_asic *asic) > struct umr_wave_status ws; > int first = 1, col = 0; > > + if (asic->options.halt_waves) > + umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT); > + > if (asic->family <= FAMILY_CIK) > shift = 3; // on SI..CIK allocations were done in 8-dword blocks > else > @@ -206,4 +209,7 @@ void umr_print_waves(struct umr_asic *asic) > } > if (first) > printf("No active waves!\n"); > + > + if (asic->options.halt_waves) > + umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME); > } > diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt > index 217ae80cdfd7..fcb4f9c9dc80 100644 > --- a/src/lib/CMakeLists.txt > +++ b/src/lib/CMakeLists.txt > @@ -21,6 +21,7 @@ add_library(umrcore STATIC > read_vram.c > ring_decode.c > scan_config.c > + sq_cmd_halt_waves.c > transfer_soc15.c > wave_status.c > update.c > diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c > index 4e81dbe3eb09..cba497373fe2 100644 > --- a/src/lib/dump_ib.c > +++ b/src/lib/dump_ib.c > @@ -28,9 +28,11 @@ > void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder) > { > uint32_t *data = NULL, x; > + static const char *hubs[] = { "gfxhub", "mmhub" }; > > - printf("Dumping IB at VMID:%u 0x%llx of %u words\n", > - (unsigned)decoder->next_ib_info.vmid, > + printf("Dumping IB at (%s) VMID:%u 0x%llx of %u words\n", > + hubs[decoder->next_ib_info.vmid >> 8], > + (unsigned)decoder->next_ib_info.vmid & 0xFF, > (unsigned long long)decoder->next_ib_info.ib_addr, > (unsigned)decoder->next_ib_info.size/4); > > diff --git a/src/lib/find_reg.c b/src/lib/find_reg.c > index d4647163ea63..ecd7f132c9c9 100644 > --- a/src/lib/find_reg.c > +++ b/src/lib/find_reg.c > @@ -36,14 +36,21 @@ uint32_t umr_find_reg(struct umr_asic *asic, char *regname) > return 0xFFFFFFFF; > } > > -struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname) > +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname) > { > int i, j; > > - for (i = 0; i < asic->no_blocks; i++) > + for (i = 0; i < asic->no_blocks; i++) { > + if (ip && memcmp(asic->blocks[i]->ipname, ip, strlen(ip))) continue; > for (j = 0; j < asic->blocks[i]->no_regs; j++) > if (!strcmp(asic->blocks[i]->regs[j].regname, regname)) > return &asic->blocks[i]->regs[j]; > + } > fprintf(stderr, "[BUG]: reg [%s] not found on asic [%s]\n", regname, asic->asicname); > return NULL; > } > + > +struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname) > +{ > + return umr_find_reg_data_by_ip(asic, NULL, regname); > +} > diff --git a/src/lib/mmio.c b/src/lib/mmio.c > index 47e5150d3201..eb91e289404f 100644 > --- a/src/lib/mmio.c > +++ b/src/lib/mmio.c > @@ -145,26 +145,36 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg > return 0; > } > > -uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name) > +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name) > { > struct umr_reg *reg; > - reg = umr_find_reg_data(asic, name); > + reg = umr_find_reg_data_by_ip(asic, ip, name); > if (reg) > return umr_read_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), reg->type); > else > return 0; > } > > -int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value) > +uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name) > +{ > + return umr_read_reg_by_name_by_ip(asic, NULL, name); > +} > + > +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value) > { > struct umr_reg *reg; > - reg = umr_find_reg_data(asic, name); > + reg = umr_find_reg_data_by_ip(asic, ip, name); > if (reg) > return umr_write_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), value, reg->type); > else > return -1; > } > > +int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value) > +{ > + return umr_write_reg_by_name_by_ip(asic, NULL, name, value); > +} > + > uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue) > { > int i; > @@ -193,26 +203,36 @@ uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, > return 0; > } > > -uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue) > +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue) > { > struct umr_reg *reg; > - reg = umr_find_reg_data(asic, regname); > + reg = umr_find_reg_data_by_ip(asic, ip, regname); > if (reg) > return umr_bitslice_reg(asic, reg, bitname, regvalue); > else > return 0; > } > > -uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue) > +uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue) > +{ > + return umr_bitslice_reg_by_name_by_ip(asic, NULL, regname, bitname, regvalue); > +} > + > +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue) > { > struct umr_reg *reg; > - reg = umr_find_reg_data(asic, regname); > + reg = umr_find_reg_data_by_ip(asic, ip, regname); > if (reg) > return umr_bitslice_compose_value(asic, reg, bitname, regvalue); > else > return 0; > } > > +uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue) > +{ > + return umr_bitslice_compose_value_by_name_by_ip(asic, NULL, regname, bitname, regvalue); > +} > + > int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance) > { > struct umr_reg *grbm_idx; > diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c > index 3d458db8fa11..b8034372b280 100644 > --- a/src/lib/read_vram.c > +++ b/src/lib/read_vram.c > @@ -135,7 +135,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid); > page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12; > > - vm_fb_base = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") >> 16) << 24; > + vm_fb_base = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") & 0xFFFF) << 24; > > DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr); > DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr); > @@ -143,6 +143,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth); > DEBUG("mmMC_VM_FB_LOCATION == %llx\n", (unsigned long long)vm_fb_base); > > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\n" > + "[VERBOSE]: PAGE_TABLE_START_ADDR=0x%llx\n" > + "[VERBOSE]: PAGE_TABLE_BASE_ADDR=0x%llx\n" > + "[VERBOSE]: PAGE_TABLE_BLOCK_SIZE=%u\n" > + "[VERBOSE]: PAGE_TABLE_DEPTH=%u\n" > + "[VERBOSE]: MC_VM_FB_LOCATION=0x%llx\n", > + (unsigned)vmid, > + (unsigned long long)address, > + (unsigned long long)page_table_start_addr, > + (unsigned long long)page_table_base_addr, > + (unsigned)page_table_size, > + (unsigned)page_table_depth, > + (unsigned long long)vm_fb_base); > + > address -= page_table_start_addr; > > while (size) { > @@ -152,17 +167,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1); > > // read PDE entry > - umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry); > + umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pde_idx * 8 - vm_fb_base, 8, &pde_entry); > > // decode PDE values > pde_fields.frag_size = (pde_entry >> 59) & 0x1F; > pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000ULL; > pde_fields.valid = pde_entry & 1; > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d\n", > + (unsigned long long)pde_fields.pte_base_addr, > + (int)pde_fields.valid); > DEBUG("PDE==%llx, pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", > (unsigned long long)pde_entry, (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid); > > // now read PTE entry for this page > - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0) > + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8 - vm_fb_base, 8, &pte_entry) < 0) > return -1; > > // decode PTE values > @@ -170,6 +189,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > pte_fields.fragment = (pte_entry >> 7) & 0x1F; > pte_fields.system = (pte_entry >> 1) & 1; > pte_fields.valid = pte_entry & 1; > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n", > + (unsigned long long)pte_fields.page_base_addr, > + (int)pte_fields.system, > + (int)pte_fields.valid); > DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", > (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); > > @@ -179,7 +203,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > // depth == 0 == PTE only > pte_idx = (address >> 12); > > - if (umr_read_vram(asic, 0xFFFF, page_table_base_addr + pte_idx * 8, 8, &pte_entry) < 0) > + if (umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pte_idx * 8 - vm_fb_base, 8, &pte_entry) < 0) > return -1; > > // decode PTE values > @@ -187,6 +211,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > pte_fields.fragment = (pte_entry >> 7) & 0x1F; > pte_fields.system = (pte_entry >> 1) & 1; > pte_fields.valid = pte_entry & 1; > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n", > + (unsigned long long)pte_fields.page_base_addr, > + (int)pte_fields.system, > + (int)pte_fields.valid); > DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); > > // compute starting address > @@ -207,7 +236,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre > return -1; > } > } else { > - if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) { > + if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) { > fprintf(stderr, "[ERROR]: Cannot read from VRAM\n"); > return -1; > } > @@ -244,6 +273,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > } pte_fields; > char buf[64]; > unsigned char *pdst = dst; > + char *hub; > > /* > * PTE format on AI: > @@ -266,21 +296,28 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > * 0 valid > */ > > + if ((vmid & 0xFF00) == UMR_MM_HUB) > + hub = "mmhub"; > + else > + hub = "gfx"; > + > + vmid &= 0xFF; > + > // read vm registers > sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); > - page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12; > + page_table_start_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 12; > sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); > - page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44; > + page_table_start_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 44; > > sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); > - tmp = umr_read_reg_by_name(asic, buf); > + tmp = umr_read_reg_by_name_by_ip(asic, hub, buf); > page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp); > page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp); > > sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); > - page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 0; > + page_table_base_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 0; > sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); > - page_table_base_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32; > + page_table_base_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 32; > > DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); > DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr); > @@ -288,6 +325,15 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size); > DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth); > > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\nPAGE_TABLE_START_ADDR=0x%llx\nPAGE_TABLE_BASE_ADDR=0x%llx\nPAGE_TABLE_BLOCK_SIZE=%u\nPAGE_TABLE_DEPTH=%u\n", > + (unsigned)vmid, > + (unsigned long long)address, > + (unsigned long long)page_table_start_addr, > + (unsigned long long)page_table_base_addr, > + (unsigned)page_table_size, > + (unsigned)page_table_depth); > + > address -= page_table_start_addr; > > // update addresses for APUs > @@ -336,7 +382,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("selector mask == %llx\n", ((unsigned long long)511 << ((page_table_depth-1)*9 + (12 + 9 + page_table_size)))); > > // read PDE entry > - if (umr_read_vram(asic, 0xFFFF, pde_address + pde_idx * 8, 8, &pde_entry) < 0) > + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_address + pde_idx * 8, 8, &pde_entry) < 0) > return -1; > > // decode PDE values > @@ -349,6 +395,13 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("PDE==%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d, system=%d, cache=%d, pte=%d\n", > (unsigned long long)pde_entry, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, > (int)pde_fields.valid, (int)pde_fields.system, (int)pde_fields.cache, (int)pde_fields.pte); > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d, PDE.cache=%d, PDE.pte=%d\n", > + (unsigned long long)pde_fields.pte_base_addr, > + (int)pde_fields.valid, > + (int)pde_fields.system, > + (int)pde_fields.cache, > + (int)pde_fields.pte); > > if (!pde_fields.system) > pde_fields.pte_base_addr -= vm_fb_offset; > @@ -360,7 +413,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > } > > // now read PTE entry for this page > - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0) > + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0) > return -1; > > // decode PTE values > @@ -371,6 +424,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", > (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, > (int)pte_fields.system, (int)pte_fields.valid); > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n", > + (unsigned long long)pte_fields.page_base_addr, > + (int)pte_fields.system, > + (int)pte_fields.valid); > > if (!pte_fields.system) > pte_fields.page_base_addr -= vm_fb_offset; > @@ -390,11 +448,16 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, system=%d, valid=%d\n", > (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, > (int)pde_fields.system, (int)pde_fields.valid); > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d\n", > + (unsigned long long)pde_fields.pte_base_addr, > + (int)pde_fields.valid, > + (int)pde_fields.system); > > // PTE addr = baseaddr[47:6] + (logical - start) >> fragsize) > pte_idx = (address >> (12 + pde_fields.frag_size)); > > - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0) > + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0) > return -1; > > // decode PTE values > @@ -405,6 +468,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", > (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, > (int)pte_fields.system, (int)pte_fields.valid); > + if (asic->options.verbose) > + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n", > + (unsigned long long)pte_fields.page_base_addr, > + (int)pte_fields.system, > + (int)pte_fields.valid); > > > // compute starting address > @@ -427,7 +495,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre > return -1; > } > } else { > - if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) { > + if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) { > fprintf(stderr, "[ERROR]: Cannot read from VRAM\n"); > return -1; > } > @@ -451,7 +519,7 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32 > return -1; > } > > - if (vmid == 0xFFFF) { > + if ((vmid & 0xFF00) == UMR_LINEAR_HUB) { > DEBUG("Reading physical VRAM addr: 0x%llx\n", (unsigned long long)address); > // addressing is physical > if (asic->options.use_pci == 0) { > diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c > index 35e72ed58e4f..772ea49dda6f 100644 > --- a/src/lib/ring_decode.c > +++ b/src/lib/ring_decode.c > @@ -589,7 +589,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec > // detect VCN/UVD IBs and chain them once all > // 4 pieces of information are found > if (!strcmp(name, "mmUVD_LMI_RBC_IB_VMID")) { > - decoder->pm4.next_ib_state.ib_vmid = ib; > + decoder->pm4.next_ib_state.ib_vmid = ib | ((asic->family <= FAMILY_VI) ? 0 : UMR_MM_HUB); > decoder->pm4.next_ib_state.tally |= 1; > } else if (!strcmp(name, "mmUVD_LMI_RBC_IB_64BIT_BAR_LOW")) { > decoder->pm4.next_ib_state.ib_addr_lo = ib; > @@ -598,7 +598,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec > decoder->pm4.next_ib_state.ib_addr_hi = ib; > decoder->pm4.next_ib_state.tally |= 4; > } else if (!strcmp(name, "mmUVD_RBC_IB_SIZE")) { > - decoder->pm4.next_ib_state.ib_size = ib; > + decoder->pm4.next_ib_state.ib_size = ib * 4; > decoder->pm4.next_ib_state.tally |= 8; > } > > diff --git a/src/lib/sq_cmd_halt_waves.c b/src/lib/sq_cmd_halt_waves.c > new file mode 100644 > index 000000000000..83aa52d2cfb2 > --- /dev/null > +++ b/src/lib/sq_cmd_halt_waves.c > @@ -0,0 +1,57 @@ > +/* > + * Copyright 2017 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Tom St Denis <tom.stdenis at amd.com> > + * > + */ > +#include "umr.h" > + > +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode) > +{ > + struct umr_reg *reg; > + uint32_t value; > + uint64_t addr; > + > + reg = umr_find_reg_data(asic, "SQ_CMD"); > + if (!reg) { > + fprintf(stderr, "[BUG]: Cannot find SQ_CMD register in umr_sq_cmd_halt_waves()\n"); > + return -1; > + } > + > + // compose value > + if (asic->family == FAMILY_CIK) { > + value = umr_bitslice_compose_value(asic, reg, "CMD", mode == UMR_SQ_CMD_HALT ? 1 : 2); // SETHALT > + } else { > + value = umr_bitslice_compose_value(asic, reg, "CMD", 1); // SETHALT > + value |= umr_bitslice_compose_value(asic, reg, "DATA", mode == UMR_SQ_CMD_HALT ? 1 : 0); > + } > + value |= umr_bitslice_compose_value(asic, reg, "MODE", 1); // BROADCAST > + > + // compose address > + addr = reg->addr * 4; > + addr |= (1ULL << 62) | // we need to take the lock so we can ensure a broadcast write > + (0x3FFULL << 24) | > + (0x3FFULL << 34) | > + (0x3FFULL << 44); > + umr_write_reg(asic, addr, value, reg->type); > + > + return 0; > +} > diff --git a/src/umr.h b/src/umr.h > index a0e94a7e4db9..dd7f80c38f0c 100644 > --- a/src/umr.h > +++ b/src/umr.h > @@ -33,6 +33,20 @@ > #include <pciaccess.h> > #include <pthread.h> > > +/* SQ_CMD halt/resume */ > +enum umr_sq_cmd_halt_resume { > + UMR_SQ_CMD_HALT=0, > + UMR_SQ_CMD_RESUME, > +}; > + > +/* memory space hubs */ > +enum umr_hub_space { > + UMR_GFX_HUB = 0 << 8, // default on everything before AI > + UMR_MM_HUB = 1 << 8, // available on AI and later > + > + UMR_LINEAR_HUB = 0xFF << 8, // this is for linear access to vram > +}; > + > /* sourced from amd_powerplay.h from the kernel */ > enum amd_pp_sensors { > AMDGPU_PP_SENSOR_GFX_SCLK = 0, > @@ -174,6 +188,8 @@ struct umr_options { > read_smc, > quiet, > follow_ib, > + verbose, > + halt_waves, > no_kernel; > unsigned > instance_bank, > @@ -477,6 +493,7 @@ int umr_create_mmio_accel(struct umr_asic *asic); > uint32_t umr_find_reg(struct umr_asic *asic, char *regname); > > // find the register data for a register > +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname); > struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname); > > // read/write a 32-bit register given a BYTE address > @@ -487,17 +504,26 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg > uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name); > int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value); > > +// read/write a register by ip/name > +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name); > +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value); > + > // slice a full register into bits (shifted into LSB) > uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue); > uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue); > +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue); > > // compose a 32-bit register with a value and a bitfield > uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue); > uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *reg, char *bitname, uint32_t regvalue); > +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue); > > // select a GRBM_GFX_IDX > int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance); > > +// halt/resume SQ waves > +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode); > + > /* IB/ring decoding/dumping/etc */ > void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib); > void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder); > -- > 2.12.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx