On 06/04/17 02:11 PM, Deucher, Alexander wrote: >> -----Original Message----- >> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf >> Of Tom St Denis >> Sent: Thursday, April 06, 2017 1:53 PM >> To: amd-gfx at lists.freedesktop.org >> Cc: StDenis, Tom >> Subject: [PATCH umr] Add initial AI VM decoding >> >> Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet. >> >> Signed-off-by: Tom St Denis <tom.stdenis at amd.com> > > Looks reasonable: > Acked-by: Alex Deucher <alexander.deucher at amd.com> Thanks. What I'm leery about is the page_table_size. On VI it's always 4KB (with our kernel) so the logic isn't vetted much in umr. Then to add to it it seems AI+ interpretation is a bit different with 0==4KB (instead of 4). I presume values greater than 0 are for multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 = 8KB of next level entries, etc. I'll try adding a stall to libdrm's GFX test again and see if the VMID >= 1 decoding works in the meantime. Cheers, Tom > >> --- >> src/lib/read_vram.c | 180 >> ++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 176 insertions(+), 4 deletions(-) >> >> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c >> index e2087a252c10..4c74f4521857 100644 >> --- a/src/lib/read_vram.c >> +++ b/src/lib/read_vram.c >> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t >> size, void *dst) >> return -1; >> } >> >> - >> static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t >> address, uint32_t size, void *dst) >> { >> uint64_t start_addr, page_table_start_addr, page_table_base_addr, >> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, >> uint32_t vmid, uint64_t addre >> if (page_table_depth == 1) { >> // decode addr into pte and pde selectors... >> pde_idx = (address >> (12 + 9 + page_table_size)) & >> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1); >> - pte_idx = (address >> 12) & ((1ULL << (9 + >> page_table_size)) - 1); >> + pte_idx = (address >> (12 + page_table_size - 4)) & >> ((1ULL << (9 + page_table_size)) - 1); >> >> // read PDE entry >> umr_read_vram(asic, 0xFFFF, page_table_base_addr >> + pde_idx * 8, 8, &pde_entry); >> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, >> uint32_t vmid, uint64_t addre >> return 0; >> } >> >> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t >> address, uint32_t size, void *dst) >> +{ >> + uint64_t start_addr, page_table_start_addr, page_table_base_addr, >> + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry, >> + pde_address; >> + uint32_t chunk_size, tmp; >> + int page_table_depth, first; >> + struct { >> + uint64_t >> + frag_size, >> + pte_base_addr, >> + valid; >> + } pde_fields; >> + struct { >> + uint64_t >> + page_base_addr, >> + fragment, >> + system, >> + valid; >> + } pte_fields; >> + char buf[64]; >> + unsigned char *pdst = dst; >> + >> + /* >> + * PTE format on VI: >> + * 63:40 reserved >> + * 39:12 4k physical page base address >> + * 11:7 fragment >> + * 6 write >> + * 5 read >> + * 4 exe >> + * 3 reserved >> + * 2 snooped >> + * 1 system >> + * 0 valid >> + * >> + * PDE format on VI: >> + * 63:59 block fragment size >> + * 58:40 reserved >> + * 39:1 physical base address of PTE >> + * bits 5:1 must be 0. >> + * 0 valid >> + */ >> + >> + // read vm registers >> + sprintf(buf, >> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); >> + page_table_start_addr = >> (uint64_t)umr_read_reg_by_name(asic, buf) << 12; >> + sprintf(buf, >> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); >> + page_table_start_addr |= >> (uint64_t)umr_read_reg_by_name(asic, buf) << 44; >> + >> + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); >> + tmp = umr_read_reg_by_name(asic, buf); >> + page_table_depth = umr_bitslice_reg_by_name(asic, buf, >> "PAGE_TABLE_DEPTH", tmp); >> + page_table_size = umr_bitslice_reg_by_name(asic, buf, >> "PAGE_TABLE_BLOCK_SIZE", tmp); >> + >> + sprintf(buf, >> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); >> + page_table_base_addr = >> (uint64_t)umr_read_reg_by_name(asic, buf) << 0; >> + sprintf(buf, >> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); >> + page_table_base_addr |= >> (uint64_t)umr_read_reg_by_name(asic, buf) << 32; >> + >> + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); >> + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long >> long)page_table_start_addr); >> + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long >> long)page_table_base_addr); >> + DEBUG("BASE_SIZE = %lu\n", page_table_size); >> + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); >> + >> + address -= page_table_start_addr; >> + >> + // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set >> to >=4 >> + page_table_size = page_table_size ? page_table_size : 4; >> + >> + first = 1; >> + while (size) { >> + if (page_table_depth >= 1) { >> + // page_table_base_addr is not a PDE entry in this >> config so shift it out (it's a page address) >> + page_table_base_addr <<= 12; >> + pte_idx = (address >> (12 + page_table_size - 4)) & >> ((1ULL << (9 + page_table_size)) - 1); >> + >> + // AI+ supports more than 1 level of PDEs so we >> iterate for all of the depths >> + pde_address = address; >> + while (page_table_depth) { >> + // decode addr into pte and pde selectors... >> + pde_idx = (pde_address >> >> (page_table_depth*9 + (12 + page_table_size - 4))); >> + >> + // don't mask the first PDE idx >> + if (!first) >> + pde_idx &= (1ULL << 9) - 1; >> + first = 0; >> + >> + // read PDE entry >> + umr_read_vram(asic, 0xFFFF, >> page_table_base_addr + pde_idx * 8, 8, &pde_entry); >> + >> + // decode PDE values >> + pde_fields.frag_size = (pde_entry >> 59) & >> 0x1F; >> + pde_fields.pte_base_addr = pde_entry & >> 0xFFFFFFFFF000ULL; >> + pde_fields.valid = pde_entry & 1; >> + DEBUG("pde_idx=%llx, frag_size=%u, >> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >> (unsigned)pde_fields.frag_size, (unsigned long >> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >> + >> + // for the next round the address we're >> decoding is the phys address in the currently decoded PDE >> + --page_table_depth; >> + pde_address = pde_fields.pte_base_addr; >> + } >> + >> + // now read PTE entry for this page >> + umr_read_vram(asic, 0xFFFF, >> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); >> + >> + // decode PTE values >> + pte_fields.page_base_addr = pte_entry & >> 0xFFFFFFFFF000ULL; >> + pte_fields.fragment = (pte_entry >> 7) & 0x1F; >> + pte_fields.system = (pte_entry >> 1) & 1; >> + pte_fields.valid = pte_entry & 1; >> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >> (unsigned long long)pte_fields.page_base_addr, >> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); >> + >> + // compute starting address >> + start_addr = pte_fields.page_base_addr + (address >> & 0xFFF); >> + } else { >> + // in AI+ the BASE_ADDR is treated like a PDE entry... >> + // decode PDE values >> + pde_idx = 0; // unused >> + pde_fields.frag_size = (page_table_base_addr >> >> 59) & 0x1F; >> + pde_fields.pte_base_addr = page_table_base_addr >> & 0xFFFFFFFFF000ULL; >> + pde_fields.valid = page_table_base_addr & 1; >> + DEBUG("pde_idx=%llx, frag_size=%u, >> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >> (unsigned)pde_fields.frag_size, (unsigned long >> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >> + >> + // PTE addr = baseaddr[47:6] + (logical - start) >> >> fragsize) >> + pte_idx = (address >> (12 + pde_fields.frag_size)); >> + >> + umr_read_vram(asic, 0xFFFF, >> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry); >> + >> + // decode PTE values >> + pte_fields.page_base_addr = pte_entry & >> 0xFFFFFFFF000ULL; >> + pte_fields.fragment = (pte_entry >> 7) & 0x1F; >> + pte_fields.system = (pte_entry >> 1) & 1; >> + pte_fields.valid = pte_entry & 1; >> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >> (unsigned long long)pte_fields.page_base_addr, >> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); >> + >> + // compute starting address >> + start_addr = pte_fields.page_base_addr + (address >> & 0xFFF); >> + } >> + >> + // read upto 4K from it >> + // TODO: Support page sizes >4KB >> + if (((start_addr & 0xFFF) + size) & ~0xFFF) { >> + chunk_size = 0x1000 - (start_addr & 0xFFF); >> + } else { >> + chunk_size = size; >> + } >> + DEBUG("Computed address we will read from: %s:%llx >> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long >> long)start_addr, (unsigned long)chunk_size); >> + if (pte_fields.system) { >> + if (umr_read_sram(start_addr, chunk_size, pdst) < 0) >> { >> + fprintf(stderr, "[ERROR] Cannot read system >> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); >> + fprintf(stderr, "[ERROR] Alternatively >> download and install /dev/fmem\n"); >> + return -1; >> + } >> + } else { >> + if (umr_read_vram(asic, 0xFFFF, start_addr, >> chunk_size, pdst) < 0) { >> + fprintf(stderr, "[ERROR] Cannot read from >> VRAM\n"); >> + return -1; >> + } >> + } >> + pdst += chunk_size; >> + size -= chunk_size; >> + address += chunk_size; >> + } >> + return 0; >> +} >> >> int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, >> uint32_t size, void *dst) >> { >> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t >> vmid, uint64_t address, uint32 >> return 0; >> } >> >> - if (asic->family == FAMILY_VI) >> - return umr_read_vram_vi(asic, vmid, address, size, dst); >> + switch (asic->family) { >> + case FAMILY_VI: >> + return umr_read_vram_vi(asic, vmid, address, size, >> dst); >> + case FAMILY_AI: >> + return umr_read_vram_ai(asic, vmid, address, size, >> dst); >> + default: >> + fprintf(stderr, "[BUG] Unsupported ASIC family type >> for umr_read_vram()\n"); >> + return -1; >> + } >> >> return 0; >> } >> -- >> 2.12.0 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx