Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet. Signed-off-by: Tom St Denis <tom.stdenis at amd.com> --- src/lib/read_vram.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 4 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index e2087a252c10..4c74f4521857 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t size, void *dst) return -1; } - static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst) { uint64_t start_addr, page_table_start_addr, page_table_base_addr, @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre if (page_table_depth == 1) { // decode addr into pte and pde selectors... pde_idx = (address >> (12 + 9 + page_table_size)) & ((1ULL << (40 - 12 - 9 - page_table_size)) - 1); - pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1); + pte_idx = (address >> (12 + page_table_size - 4)) & ((1ULL << (9 + page_table_size)) - 1); // read PDE entry umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry); @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre return 0; } +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst) +{ + uint64_t start_addr, page_table_start_addr, page_table_base_addr, + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry, + pde_address; + uint32_t chunk_size, tmp; + int page_table_depth, first; + struct { + uint64_t + frag_size, + pte_base_addr, + valid; + } pde_fields; + struct { + uint64_t + page_base_addr, + fragment, + system, + valid; + } pte_fields; + char buf[64]; + unsigned char *pdst = dst; + + /* + * PTE format on VI: + * 63:40 reserved + * 39:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 reserved + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on VI: + * 63:59 block fragment size + * 58:40 reserved + * 39:1 physical base address of PTE + * bits 5:1 must be 0. + * 0 valid + */ + + // read vm registers + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); + page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12; + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); + page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44; + + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); + tmp = umr_read_reg_by_name(asic, buf); + page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp); + page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp); + + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); + page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 0; + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); + page_table_base_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32; + + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr); + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr); + DEBUG("BASE_SIZE = %lu\n", page_table_size); + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); + + address -= page_table_start_addr; + + // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set to >=4 + page_table_size = page_table_size ? page_table_size : 4; + + first = 1; + while (size) { + if (page_table_depth >= 1) { + // page_table_base_addr is not a PDE entry in this config so shift it out (it's a page address) + page_table_base_addr <<= 12; + pte_idx = (address >> (12 + page_table_size - 4)) & ((1ULL << (9 + page_table_size)) - 1); + + // AI+ supports more than 1 level of PDEs so we iterate for all of the depths + pde_address = address; + while (page_table_depth) { + // decode addr into pte and pde selectors... + pde_idx = (pde_address >> (page_table_depth*9 + (12 + page_table_size - 4))); + + // don't mask the first PDE idx + if (!first) + pde_idx &= (1ULL << 9) - 1; + first = 0; + + // read PDE entry + umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry); + + // decode PDE values + pde_fields.frag_size = (pde_entry >> 59) & 0x1F; + pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL; + pde_fields.valid = pde_entry & 1; + DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid); + + // for the next round the address we're decoding is the phys address in the currently decoded PDE + --page_table_depth; + pde_address = pde_fields.pte_base_addr; + } + + // now read PTE entry for this page + umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); + + // decode PTE values + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL; + pte_fields.fragment = (pte_entry >> 7) & 0x1F; + pte_fields.system = (pte_entry >> 1) & 1; + pte_fields.valid = pte_entry & 1; + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); + + // compute starting address + start_addr = pte_fields.page_base_addr + (address & 0xFFF); + } else { + // in AI+ the BASE_ADDR is treated like a PDE entry... + // decode PDE values + pde_idx = 0; // unused + pde_fields.frag_size = (page_table_base_addr >> 59) & 0x1F; + pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL; + pde_fields.valid = page_table_base_addr & 1; + DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid); + + // PTE addr = baseaddr[47:6] + (logical - start) >> fragsize) + pte_idx = (address >> (12 + pde_fields.frag_size)); + + umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry); + + // decode PTE values + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFF000ULL; + pte_fields.fragment = (pte_entry >> 7) & 0x1F; + pte_fields.system = (pte_entry >> 1) & 1; + pte_fields.valid = pte_entry & 1; + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); + + // compute starting address + start_addr = pte_fields.page_base_addr + (address & 0xFFF); + } + + // read upto 4K from it + // TODO: Support page sizes >4KB + if (((start_addr & 0xFFF) + size) & ~0xFFF) { + chunk_size = 0x1000 - (start_addr & 0xFFF); + } else { + chunk_size = size; + } + DEBUG("Computed address we will read from: %s:%llx (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long long)start_addr, (unsigned long)chunk_size); + if (pte_fields.system) { + if (umr_read_sram(start_addr, chunk_size, pdst) < 0) { + fprintf(stderr, "[ERROR] Cannot read system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); + fprintf(stderr, "[ERROR] Alternatively download and install /dev/fmem\n"); + return -1; + } + } else { + if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) { + fprintf(stderr, "[ERROR] Cannot read from VRAM\n"); + return -1; + } + } + pdst += chunk_size; + size -= chunk_size; + address += chunk_size; + } + return 0; +} int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst) { @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32 return 0; } - if (asic->family == FAMILY_VI) - return umr_read_vram_vi(asic, vmid, address, size, dst); + switch (asic->family) { + case FAMILY_VI: + return umr_read_vram_vi(asic, vmid, address, size, dst); + case FAMILY_AI: + return umr_read_vram_ai(asic, vmid, address, size, dst); + default: + fprintf(stderr, "[BUG] Unsupported ASIC family type for umr_read_vram()\n"); + return -1; + } return 0; } -- 2.12.0