[PATCH umr 4/4] Add ability to halt waves and better VM decoding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jul 24, 2017 at 11:25 AM, Tom St Denis <tom.stdenis at amd.com> wrote:
> This patch involves two things I was working on at once so the
> patches are a bit intertwined.  It adds
>
> 1.  The ability to halt SQ waves when reading waves on CIK and later
> ASICs.
>
> 2.  The ability to enable verbose decoding when reading vram
>
> 3.  The ability to decode virtual addresses in the mmhub (for say VCN)
>
> Signed-off-by: Tom St Denis <tom.stdenis at amd.com>

Series is:
Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  doc/umr.1                   |  6 +++
>  src/app/main.c              | 15 +++++--
>  src/app/print_waves.c       |  6 +++
>  src/lib/CMakeLists.txt      |  1 +
>  src/lib/dump_ib.c           |  6 ++-
>  src/lib/find_reg.c          | 11 ++++-
>  src/lib/mmio.c              | 36 +++++++++++++----
>  src/lib/read_vram.c         | 98 ++++++++++++++++++++++++++++++++++++++-------
>  src/lib/ring_decode.c       |  4 +-
>  src/lib/sq_cmd_halt_waves.c | 57 ++++++++++++++++++++++++++
>  src/umr.h                   | 26 ++++++++++++
>  11 files changed, 233 insertions(+), 33 deletions(-)
>  create mode 100644 src/lib/sq_cmd_halt_waves.c
>
> diff --git a/doc/umr.1 b/doc/umr.1
> index 4c720ba48840..da432a13b0d1 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -148,6 +148,12 @@ separated strings.  Options should be specified before --update or --force comma
>       be used only if the KMD is hung or otherwise not working correctly.  Using it on live systems
>       may result in race conditions.
>
> +.B verbose
> +     Enable verbose diagnostics (used in --vram).
> +
> +.B halt_waves
> +     Halt/resume all waves while reading wave status.
> +
>  .SH "Notes"
>
>  - The "Waves" field in the DRM section of --top only works if GFX PG has been disabled.  Otherwise,
> diff --git a/src/app/main.c b/src/app/main.c
> index 6e0bc57200b0..7e3914155a22 100644
> --- a/src/app/main.c
> +++ b/src/app/main.c
> @@ -107,6 +107,10 @@ static void parse_options(char *str)
>                         options.quiet = 1;
>                 } else if (!strcmp(option, "follow_ib")) {
>                         options.follow_ib = 1;
> +               } else if (!strcmp(option, "verbose")) {
> +                       options.verbose = 1;
> +               } else if (!strcmp(option, "halt_waves")) {
> +                       options.halt_waves = 1;
>                 } else if (!strcmp(option, "no_kernel")) {
>                         options.no_kernel = 1;
>                         options.use_pci = 1;
> @@ -422,12 +426,15 @@ int main(int argc, char **argv)
>  "\n\t--top, -t\n\t\tSummarize GPU utilization.  Can select a SE block with --bank.  Can use"
>         "\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n"
>  "\n\t--waves, -wa\n\t\tPrint out information about any active CU waves.  Can use '-O bits'"
> -       "\n\t\tto see decoding of various wave fields.\n"
> +       "\n\t\tto see decoding of various wave fields.  Can use the '-O halt_waves' option"
> +       "\n\t\tto halt the SQ while reading registers.\n"
>  "\n\t--vram, -v [<vmid>@]<address> <size>"
>         "\n\t\tRead 'size' bytes (in hex) from a given address (in hex) to stdout. Optionally"
> -       "\n\t\tspecify the VMID (in decimal) treating the address as a virtual address instead.\n"
> -"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: risky, bits, bitsfull, empty_log, follow, named, many,"
> -       "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel.\n"
> +       "\n\t\tspecify the VMID (in decimal or in hex with a '0x' prefix) treating the address"
> +       "\n\t\tas a virtual address instead.  Can use 'verbose' option to print out PDE/PTE"
> +       "\n\t\tdecodings.\n"
> +"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: bits, bitsfull, empty_log, follow, named, many,"
> +       "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel, verbose, halt_waves.\n"
>  "\n\n", UMR_BUILD_VER, UMR_BUILD_REV);
>                         exit(EXIT_SUCCESS);
>                 } else {
> diff --git a/src/app/print_waves.c b/src/app/print_waves.c
> index e157db9f9386..1efd8a13bd28 100644
> --- a/src/app/print_waves.c
> +++ b/src/app/print_waves.c
> @@ -40,6 +40,9 @@ void umr_print_waves(struct umr_asic *asic)
>         struct umr_wave_status ws;
>         int first = 1, col = 0;
>
> +       if (asic->options.halt_waves)
> +               umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
> +
>         if (asic->family <= FAMILY_CIK)
>                 shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
>         else
> @@ -206,4 +209,7 @@ void umr_print_waves(struct umr_asic *asic)
>         }
>         if (first)
>                 printf("No active waves!\n");
> +
> +       if (asic->options.halt_waves)
> +               umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
>  }
> diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
> index 217ae80cdfd7..fcb4f9c9dc80 100644
> --- a/src/lib/CMakeLists.txt
> +++ b/src/lib/CMakeLists.txt
> @@ -21,6 +21,7 @@ add_library(umrcore STATIC
>    read_vram.c
>    ring_decode.c
>    scan_config.c
> +  sq_cmd_halt_waves.c
>    transfer_soc15.c
>    wave_status.c
>    update.c
> diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c
> index 4e81dbe3eb09..cba497373fe2 100644
> --- a/src/lib/dump_ib.c
> +++ b/src/lib/dump_ib.c
> @@ -28,9 +28,11 @@
>  void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder)
>  {
>         uint32_t *data = NULL, x;
> +       static const char *hubs[] = { "gfxhub", "mmhub" };
>
> -       printf("Dumping IB at VMID:%u 0x%llx of %u words\n",
> -               (unsigned)decoder->next_ib_info.vmid,
> +       printf("Dumping IB at (%s) VMID:%u 0x%llx of %u words\n",
> +               hubs[decoder->next_ib_info.vmid >> 8],
> +               (unsigned)decoder->next_ib_info.vmid & 0xFF,
>                 (unsigned long long)decoder->next_ib_info.ib_addr,
>                 (unsigned)decoder->next_ib_info.size/4);
>
> diff --git a/src/lib/find_reg.c b/src/lib/find_reg.c
> index d4647163ea63..ecd7f132c9c9 100644
> --- a/src/lib/find_reg.c
> +++ b/src/lib/find_reg.c
> @@ -36,14 +36,21 @@ uint32_t umr_find_reg(struct umr_asic *asic, char *regname)
>         return 0xFFFFFFFF;
>  }
>
> -struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
> +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname)
>  {
>         int i, j;
>
> -       for (i = 0; i < asic->no_blocks; i++)
> +       for (i = 0; i < asic->no_blocks; i++) {
> +               if (ip && memcmp(asic->blocks[i]->ipname, ip, strlen(ip))) continue;
>                 for (j = 0; j < asic->blocks[i]->no_regs; j++)
>                         if (!strcmp(asic->blocks[i]->regs[j].regname, regname))
>                                 return &asic->blocks[i]->regs[j];
> +       }
>         fprintf(stderr, "[BUG]: reg [%s] not found on asic [%s]\n", regname, asic->asicname);
>         return NULL;
>  }
> +
> +struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
> +{
> +       return umr_find_reg_data_by_ip(asic, NULL, regname);
> +}
> diff --git a/src/lib/mmio.c b/src/lib/mmio.c
> index 47e5150d3201..eb91e289404f 100644
> --- a/src/lib/mmio.c
> +++ b/src/lib/mmio.c
> @@ -145,26 +145,36 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
>         return 0;
>  }
>
> -uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
> +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name)
>  {
>         struct umr_reg *reg;
> -       reg = umr_find_reg_data(asic, name);
> +       reg = umr_find_reg_data_by_ip(asic, ip, name);
>         if (reg)
>                 return umr_read_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), reg->type);
>         else
>                 return 0;
>  }
>
> -int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
> +uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
> +{
> +       return umr_read_reg_by_name_by_ip(asic, NULL, name);
> +}
> +
> +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value)
>  {
>         struct umr_reg *reg;
> -       reg = umr_find_reg_data(asic, name);
> +       reg = umr_find_reg_data_by_ip(asic, ip, name);
>         if (reg)
>                 return umr_write_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), value, reg->type);
>         else
>                 return -1;
>  }
>
> +int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
> +{
> +       return umr_write_reg_by_name_by_ip(asic, NULL, name, value);
> +}
> +
>  uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue)
>  {
>         int i;
> @@ -193,26 +203,36 @@ uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg,
>         return 0;
>  }
>
> -uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
>  {
>         struct umr_reg *reg;
> -       reg = umr_find_reg_data(asic, regname);
> +       reg = umr_find_reg_data_by_ip(asic, ip, regname);
>         if (reg)
>                 return umr_bitslice_reg(asic, reg, bitname, regvalue);
>         else
>                 return 0;
>  }
>
> -uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +{
> +       return umr_bitslice_reg_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
> +}
> +
> +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
>  {
>         struct umr_reg *reg;
> -       reg = umr_find_reg_data(asic, regname);
> +       reg = umr_find_reg_data_by_ip(asic, ip, regname);
>         if (reg)
>                 return umr_bitslice_compose_value(asic, reg, bitname, regvalue);
>         else
>                 return 0;
>  }
>
> +uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +{
> +       return umr_bitslice_compose_value_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
> +}
> +
>  int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance)
>  {
>         struct umr_reg *grbm_idx;
> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
> index 3d458db8fa11..b8034372b280 100644
> --- a/src/lib/read_vram.c
> +++ b/src/lib/read_vram.c
> @@ -135,7 +135,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>         sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid);
>                 page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
>
> -       vm_fb_base  = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") >> 16) << 24;
> +       vm_fb_base  = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") & 0xFFFF) << 24;
>
>         DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
>         DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
> @@ -143,6 +143,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>         DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
>         DEBUG("mmMC_VM_FB_LOCATION == %llx\n", (unsigned long long)vm_fb_base);
>
> +       if (asic->options.verbose)
> +               fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\n"
> +                               "[VERBOSE]: PAGE_TABLE_START_ADDR=0x%llx\n"
> +                               "[VERBOSE]: PAGE_TABLE_BASE_ADDR=0x%llx\n"
> +                               "[VERBOSE]: PAGE_TABLE_BLOCK_SIZE=%u\n"
> +                               "[VERBOSE]: PAGE_TABLE_DEPTH=%u\n"
> +                               "[VERBOSE]: MC_VM_FB_LOCATION=0x%llx\n",
> +                       (unsigned)vmid,
> +                       (unsigned long long)address,
> +                       (unsigned long long)page_table_start_addr,
> +                       (unsigned long long)page_table_base_addr,
> +                       (unsigned)page_table_size,
> +                       (unsigned)page_table_depth,
> +                       (unsigned long long)vm_fb_base);
> +
>         address -= page_table_start_addr;
>
>         while (size) {
> @@ -152,17 +167,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
>
>                         // read PDE entry
> -                       umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
> +                       umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pde_idx * 8 - vm_fb_base, 8, &pde_entry);
>
>                         // decode PDE values
>                         pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
>                         pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000ULL;
>                         pde_fields.valid         = pde_entry & 1;
> +                       if (asic->options.verbose)
> +                               fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d\n",
> +                                               (unsigned long long)pde_fields.pte_base_addr,
> +                                               (int)pde_fields.valid);
>                         DEBUG("PDE==%llx, pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n",
>                                 (unsigned long long)pde_entry, (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>
>                         // now read PTE entry for this page
> -                       if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8 - vm_fb_base, 8, &pte_entry) < 0)
>                                 return -1;
>
>                         // decode PTE values
> @@ -170,6 +189,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
>                         pte_fields.system         = (pte_entry >> 1) & 1;
>                         pte_fields.valid          = pte_entry & 1;
> +                       if (asic->options.verbose)
> +                               fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> +                                       (unsigned long long)pte_fields.page_base_addr,
> +                                       (int)pte_fields.system,
> +                                       (int)pte_fields.valid);
>                         DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
>                                 (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>
> @@ -179,7 +203,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         // depth == 0 == PTE only
>                         pte_idx = (address >> 12);
>
> -                       if (umr_read_vram(asic, 0xFFFF, page_table_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pte_idx * 8 - vm_fb_base, 8, &pte_entry) < 0)
>                                 return -1;
>
>                         // decode PTE values
> @@ -187,6 +211,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
>                         pte_fields.system         = (pte_entry >> 1) & 1;
>                         pte_fields.valid          = pte_entry & 1;
> +                       if (asic->options.verbose)
> +                               fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> +                                       (unsigned long long)pte_fields.page_base_addr,
> +                                       (int)pte_fields.system,
> +                                       (int)pte_fields.valid);
>                         DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>
>                         // compute starting address
> @@ -207,7 +236,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                                 return -1;
>                         }
>                 } else {
> -                       if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
>                                 fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
>                                 return -1;
>                         }
> @@ -244,6 +273,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>         } pte_fields;
>         char buf[64];
>         unsigned char *pdst = dst;
> +       char *hub;
>
>         /*
>          * PTE format on AI:
> @@ -266,21 +296,28 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>          * 0 valid
>          */
>
> +       if ((vmid & 0xFF00) == UMR_MM_HUB)
> +               hub = "mmhub";
> +       else
> +               hub = "gfx";
> +
> +       vmid &= 0xFF;
> +
>         // read vm registers
>         sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
> -               page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
> +               page_table_start_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 12;
>         sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
> -               page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
> +               page_table_start_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 44;
>
>         sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
> -               tmp = umr_read_reg_by_name(asic, buf);
> +               tmp = umr_read_reg_by_name_by_ip(asic, hub, buf);
>                 page_table_depth      = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
>                 page_table_size       = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
>
>         sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
> -               page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
> +               page_table_base_addr  = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 0;
>         sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
> -               page_table_base_addr  |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
> +               page_table_base_addr  |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 32;
>
>         DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
>         DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
> @@ -288,6 +325,15 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>         DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size);
>         DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
>
> +       if (asic->options.verbose)
> +               fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\nPAGE_TABLE_START_ADDR=0x%llx\nPAGE_TABLE_BASE_ADDR=0x%llx\nPAGE_TABLE_BLOCK_SIZE=%u\nPAGE_TABLE_DEPTH=%u\n",
> +                       (unsigned)vmid,
> +                       (unsigned long long)address,
> +                       (unsigned long long)page_table_start_addr,
> +                       (unsigned long long)page_table_base_addr,
> +                       (unsigned)page_table_size,
> +                       (unsigned)page_table_depth);
> +
>         address -= page_table_start_addr;
>
>         // update addresses for APUs
> @@ -336,7 +382,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                                 DEBUG("selector mask == %llx\n", ((unsigned long long)511 << ((page_table_depth-1)*9 + (12 + 9 + page_table_size))));
>
>                                 // read PDE entry
> -                               if (umr_read_vram(asic, 0xFFFF, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
> +                               if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
>                                         return -1;
>
>                                 // decode PDE values
> @@ -349,6 +395,13 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                                 DEBUG("PDE==%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d, system=%d, cache=%d, pte=%d\n",
>                                         (unsigned long long)pde_entry, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
>                                         (int)pde_fields.valid, (int)pde_fields.system, (int)pde_fields.cache, (int)pde_fields.pte);
> +                               if (asic->options.verbose)
> +                                       fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d, PDE.cache=%d, PDE.pte=%d\n",
> +                                                       (unsigned long long)pde_fields.pte_base_addr,
> +                                                       (int)pde_fields.valid,
> +                                                       (int)pde_fields.system,
> +                                                       (int)pde_fields.cache,
> +                                                       (int)pde_fields.pte);
>
>                                 if (!pde_fields.system)
>                                         pde_fields.pte_base_addr -= vm_fb_offset;
> @@ -360,7 +413,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         }
>
>                         // now read PTE entry for this page
> -                       if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
>                                 return -1;
>
>                         // decode PTE values
> @@ -371,6 +424,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
>                                 (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
>                                 (int)pte_fields.system, (int)pte_fields.valid);
> +                       if (asic->options.verbose)
> +                               fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> +                                       (unsigned long long)pte_fields.page_base_addr,
> +                                       (int)pte_fields.system,
> +                                       (int)pte_fields.valid);
>
>                         if (!pte_fields.system)
>                                 pte_fields.page_base_addr -= vm_fb_offset;
> @@ -390,11 +448,16 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, system=%d, valid=%d\n",
>                                 (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
>                                 (int)pde_fields.system, (int)pde_fields.valid);
> +                               if (asic->options.verbose)
> +                                       fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d\n",
> +                                                       (unsigned long long)pde_fields.pte_base_addr,
> +                                                       (int)pde_fields.valid,
> +                                                       (int)pde_fields.system);
>
>                         // PTE addr = baseaddr[47:6] + (logical - start) >> fragsize)
>                         pte_idx = (address >> (12 + pde_fields.frag_size));
>
> -                       if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
>                                 return -1;
>
>                         // decode PTE values
> @@ -405,6 +468,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                         DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
>                                 (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
>                                 (int)pte_fields.system, (int)pte_fields.valid);
> +                       if (asic->options.verbose)
> +                               fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> +                                       (unsigned long long)pte_fields.page_base_addr,
> +                                       (int)pte_fields.system,
> +                                       (int)pte_fields.valid);
>
>
>                         // compute starting address
> @@ -427,7 +495,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
>                                 return -1;
>                         }
>                 } else {
> -                       if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
> +                       if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
>                                 fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
>                                 return -1;
>                         }
> @@ -451,7 +519,7 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
>                 return -1;
>         }
>
> -       if (vmid == 0xFFFF) {
> +       if ((vmid & 0xFF00) == UMR_LINEAR_HUB) {
>                 DEBUG("Reading physical VRAM addr: 0x%llx\n", (unsigned long long)address);
>                 // addressing is physical
>                 if (asic->options.use_pci == 0) {
> diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c
> index 35e72ed58e4f..772ea49dda6f 100644
> --- a/src/lib/ring_decode.c
> +++ b/src/lib/ring_decode.c
> @@ -589,7 +589,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
>                         // detect VCN/UVD IBs and chain them once all
>                         // 4 pieces of information are found
>                         if (!strcmp(name, "mmUVD_LMI_RBC_IB_VMID")) {
> -                               decoder->pm4.next_ib_state.ib_vmid = ib;
> +                               decoder->pm4.next_ib_state.ib_vmid = ib | ((asic->family <= FAMILY_VI) ? 0 : UMR_MM_HUB);
>                                 decoder->pm4.next_ib_state.tally |= 1;
>                         } else if (!strcmp(name, "mmUVD_LMI_RBC_IB_64BIT_BAR_LOW")) {
>                                 decoder->pm4.next_ib_state.ib_addr_lo = ib;
> @@ -598,7 +598,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
>                                 decoder->pm4.next_ib_state.ib_addr_hi = ib;
>                                 decoder->pm4.next_ib_state.tally |= 4;
>                         } else if (!strcmp(name, "mmUVD_RBC_IB_SIZE")) {
> -                               decoder->pm4.next_ib_state.ib_size = ib;
> +                               decoder->pm4.next_ib_state.ib_size = ib * 4;
>                                 decoder->pm4.next_ib_state.tally |= 8;
>                         }
>
> diff --git a/src/lib/sq_cmd_halt_waves.c b/src/lib/sq_cmd_halt_waves.c
> new file mode 100644
> index 000000000000..83aa52d2cfb2
> --- /dev/null
> +++ b/src/lib/sq_cmd_halt_waves.c
> @@ -0,0 +1,57 @@
> +/*
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Tom St Denis <tom.stdenis at amd.com>
> + *
> + */
> +#include "umr.h"
> +
> +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode)
> +{
> +       struct umr_reg *reg;
> +       uint32_t value;
> +       uint64_t addr;
> +
> +       reg = umr_find_reg_data(asic, "SQ_CMD");
> +       if (!reg) {
> +               fprintf(stderr, "[BUG]: Cannot find SQ_CMD register in umr_sq_cmd_halt_waves()\n");
> +               return -1;
> +       }
> +
> +       // compose value
> +       if (asic->family == FAMILY_CIK) {
> +               value = umr_bitslice_compose_value(asic, reg, "CMD", mode == UMR_SQ_CMD_HALT ? 1 : 2); // SETHALT
> +       } else {
> +               value = umr_bitslice_compose_value(asic, reg, "CMD", 1); // SETHALT
> +               value |= umr_bitslice_compose_value(asic, reg, "DATA", mode == UMR_SQ_CMD_HALT ? 1 : 0);
> +       }
> +       value |= umr_bitslice_compose_value(asic, reg, "MODE", 1); // BROADCAST
> +
> +       // compose address
> +       addr = reg->addr * 4;
> +       addr |= (1ULL << 62) |      // we need to take the lock so we can ensure a broadcast write
> +                       (0x3FFULL << 24) |
> +                       (0x3FFULL << 34) |
> +                       (0x3FFULL << 44);
> +       umr_write_reg(asic, addr, value, reg->type);
> +
> +       return 0;
> +}
> diff --git a/src/umr.h b/src/umr.h
> index a0e94a7e4db9..dd7f80c38f0c 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -33,6 +33,20 @@
>  #include <pciaccess.h>
>  #include <pthread.h>
>
> +/* SQ_CMD halt/resume */
> +enum umr_sq_cmd_halt_resume {
> +       UMR_SQ_CMD_HALT=0,
> +       UMR_SQ_CMD_RESUME,
> +};
> +
> +/* memory space hubs */
> +enum umr_hub_space {
> +       UMR_GFX_HUB = 0 << 8,        // default on everything before AI
> +       UMR_MM_HUB = 1 << 8,         // available on AI and later
> +
> +       UMR_LINEAR_HUB = 0xFF << 8,  // this is for linear access to vram
> +};
> +
>  /* sourced from amd_powerplay.h from the kernel */
>  enum amd_pp_sensors {
>         AMDGPU_PP_SENSOR_GFX_SCLK = 0,
> @@ -174,6 +188,8 @@ struct umr_options {
>             read_smc,
>             quiet,
>             follow_ib,
> +           verbose,
> +           halt_waves,
>             no_kernel;
>         unsigned
>             instance_bank,
> @@ -477,6 +493,7 @@ int umr_create_mmio_accel(struct umr_asic *asic);
>  uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
>
>  // find the register data for a register
> +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname);
>  struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname);
>
>  // read/write a 32-bit register given a BYTE address
> @@ -487,17 +504,26 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
>  uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name);
>  int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value);
>
> +// read/write a register by ip/name
> +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name);
> +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value);
> +
>  // slice a full register into bits (shifted into LSB)
>  uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
>  uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue);
> +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
>
>  // compose a 32-bit register with a value and a bitfield
>  uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
>  uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *reg, char *bitname, uint32_t regvalue);
> +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
>
>  // select a GRBM_GFX_IDX
>  int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance);
>
> +// halt/resume SQ waves
> +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
> +
>  /* IB/ring decoding/dumping/etc */
>  void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
>  void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
> --
> 2.12.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux