The more I think about it I wonder if // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set to >=4 page_table_size = page_table_size ? page_table_size : 4; Should be instead page_table_size += 4; Since in the later code pde_idx = (pde_address >> (page_table_depth*9 + (12 + page_table_size - 4))); Would result in a 9-bit page size if say page_table_size was 1 which should actually be 13 bits (8KB). Tom On 06/04/17 02:14 PM, Tom St Denis wrote: > On 06/04/17 02:11 PM, Deucher, Alexander wrote: >>> -----Original Message----- >>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf >>> Of Tom St Denis >>> Sent: Thursday, April 06, 2017 1:53 PM >>> To: amd-gfx at lists.freedesktop.org >>> Cc: StDenis, Tom >>> Subject: [PATCH umr] Add initial AI VM decoding >>> >>> Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet. >>> >>> Signed-off-by: Tom St Denis <tom.stdenis at amd.com> >> >> Looks reasonable: >> Acked-by: Alex Deucher <alexander.deucher at amd.com> > > Thanks. What I'm leery about is the page_table_size. On VI it's always > 4KB (with our kernel) so the logic isn't vetted much in umr. > > Then to add to it it seems AI+ interpretation is a bit different with > 0==4KB (instead of 4). I presume values greater than 0 are for > multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 = > 8KB of next level entries, etc. > > I'll try adding a stall to libdrm's GFX test again and see if the VMID >>= 1 decoding works in the meantime. > > Cheers, > Tom > > > >> >>> --- >>> src/lib/read_vram.c | 180 >>> ++++++++++++++++++++++++++++++++++++++++++++++++++-- >>> 1 file changed, 176 insertions(+), 4 deletions(-) >>> >>> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c >>> index e2087a252c10..4c74f4521857 100644 >>> --- a/src/lib/read_vram.c >>> +++ b/src/lib/read_vram.c >>> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t >>> size, void *dst) >>> return -1; >>> } >>> >>> - >>> static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, >>> uint64_t >>> address, uint32_t size, void *dst) >>> { >>> uint64_t start_addr, page_table_start_addr, page_table_base_addr, >>> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, >>> uint32_t vmid, uint64_t addre >>> if (page_table_depth == 1) { >>> // decode addr into pte and pde selectors... >>> pde_idx = (address >> (12 + 9 + >>> page_table_size)) & >>> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1); >>> - pte_idx = (address >> 12) & ((1ULL << (9 + >>> page_table_size)) - 1); >>> + pte_idx = (address >> (12 + page_table_size - >>> 4)) & >>> ((1ULL << (9 + page_table_size)) - 1); >>> >>> // read PDE entry >>> umr_read_vram(asic, 0xFFFF, page_table_base_addr >>> + pde_idx * 8, 8, &pde_entry); >>> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, >>> uint32_t vmid, uint64_t addre >>> return 0; >>> } >>> >>> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, >>> uint64_t >>> address, uint32_t size, void *dst) >>> +{ >>> + uint64_t start_addr, page_table_start_addr, page_table_base_addr, >>> + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry, >>> + pde_address; >>> + uint32_t chunk_size, tmp; >>> + int page_table_depth, first; >>> + struct { >>> + uint64_t >>> + frag_size, >>> + pte_base_addr, >>> + valid; >>> + } pde_fields; >>> + struct { >>> + uint64_t >>> + page_base_addr, >>> + fragment, >>> + system, >>> + valid; >>> + } pte_fields; >>> + char buf[64]; >>> + unsigned char *pdst = dst; >>> + >>> + /* >>> + * PTE format on VI: >>> + * 63:40 reserved >>> + * 39:12 4k physical page base address >>> + * 11:7 fragment >>> + * 6 write >>> + * 5 read >>> + * 4 exe >>> + * 3 reserved >>> + * 2 snooped >>> + * 1 system >>> + * 0 valid >>> + * >>> + * PDE format on VI: >>> + * 63:59 block fragment size >>> + * 58:40 reserved >>> + * 39:1 physical base address of PTE >>> + * bits 5:1 must be 0. >>> + * 0 valid >>> + */ >>> + >>> + // read vm registers >>> + sprintf(buf, >>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); >>> + page_table_start_addr = >>> (uint64_t)umr_read_reg_by_name(asic, buf) << 12; >>> + sprintf(buf, >>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); >>> + page_table_start_addr |= >>> (uint64_t)umr_read_reg_by_name(asic, buf) << 44; >>> + >>> + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); >>> + tmp = umr_read_reg_by_name(asic, buf); >>> + page_table_depth = umr_bitslice_reg_by_name(asic, >>> buf, >>> "PAGE_TABLE_DEPTH", tmp); >>> + page_table_size = umr_bitslice_reg_by_name(asic, >>> buf, >>> "PAGE_TABLE_BLOCK_SIZE", tmp); >>> + >>> + sprintf(buf, >>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); >>> + page_table_base_addr = >>> (uint64_t)umr_read_reg_by_name(asic, buf) << 0; >>> + sprintf(buf, >>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); >>> + page_table_base_addr |= >>> (uint64_t)umr_read_reg_by_name(asic, buf) << 32; >>> + >>> + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); >>> + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long >>> long)page_table_start_addr); >>> + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long >>> long)page_table_base_addr); >>> + DEBUG("BASE_SIZE = %lu\n", page_table_size); >>> + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); >>> + >>> + address -= page_table_start_addr; >>> + >>> + // AI+ allows 0=default (4KB) whereas VI requires it to be >>> explictly set >>> to >=4 >>> + page_table_size = page_table_size ? page_table_size : 4; >>> + >>> + first = 1; >>> + while (size) { >>> + if (page_table_depth >= 1) { >>> + // page_table_base_addr is not a PDE entry in this >>> config so shift it out (it's a page address) >>> + page_table_base_addr <<= 12; >>> + pte_idx = (address >> (12 + page_table_size - >>> 4)) & >>> ((1ULL << (9 + page_table_size)) - 1); >>> + >>> + // AI+ supports more than 1 level of PDEs so we >>> iterate for all of the depths >>> + pde_address = address; >>> + while (page_table_depth) { >>> + // decode addr into pte and pde >>> selectors... >>> + pde_idx = (pde_address >> >>> (page_table_depth*9 + (12 + page_table_size - 4))); >>> + >>> + // don't mask the first PDE idx >>> + if (!first) >>> + pde_idx &= (1ULL << 9) - 1; >>> + first = 0; >>> + >>> + // read PDE entry >>> + umr_read_vram(asic, 0xFFFF, >>> page_table_base_addr + pde_idx * 8, 8, &pde_entry); >>> + >>> + // decode PDE values >>> + pde_fields.frag_size = (pde_entry >>> >> 59) & >>> 0x1F; >>> + pde_fields.pte_base_addr = pde_entry & >>> 0xFFFFFFFFF000ULL; >>> + pde_fields.valid = pde_entry & 1; >>> + DEBUG("pde_idx=%llx, frag_size=%u, >>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >>> (unsigned)pde_fields.frag_size, (unsigned long >>> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >>> + >>> + // for the next round the address we're >>> decoding is the phys address in the currently decoded PDE >>> + --page_table_depth; >>> + pde_address = pde_fields.pte_base_addr; >>> + } >>> + >>> + // now read PTE entry for this page >>> + umr_read_vram(asic, 0xFFFF, >>> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); >>> + >>> + // decode PTE values >>> + pte_fields.page_base_addr = pte_entry & >>> 0xFFFFFFFFF000ULL; >>> + pte_fields.fragment = (pte_entry >> 7) & >>> 0x1F; >>> + pte_fields.system = (pte_entry >> 1) & 1; >>> + pte_fields.valid = pte_entry & 1; >>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >>> (unsigned long long)pte_fields.page_base_addr, >>> (unsigned)pte_fields.fragment, (int)pte_fields.system, >>> (int)pte_fields.valid); >>> + >>> + // compute starting address >>> + start_addr = pte_fields.page_base_addr + (address >>> & 0xFFF); >>> + } else { >>> + // in AI+ the BASE_ADDR is treated like a PDE >>> entry... >>> + // decode PDE values >>> + pde_idx = 0; // unused >>> + pde_fields.frag_size = >>> (page_table_base_addr >> >>> 59) & 0x1F; >>> + pde_fields.pte_base_addr = page_table_base_addr >>> & 0xFFFFFFFFF000ULL; >>> + pde_fields.valid = page_table_base_addr >>> & 1; >>> + DEBUG("pde_idx=%llx, frag_size=%u, >>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >>> (unsigned)pde_fields.frag_size, (unsigned long >>> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >>> + >>> + // PTE addr = baseaddr[47:6] + (logical - >>> start) >> >>> fragsize) >>> + pte_idx = (address >> (12 + >>> pde_fields.frag_size)); >>> + >>> + umr_read_vram(asic, 0xFFFF, >>> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry); >>> + >>> + // decode PTE values >>> + pte_fields.page_base_addr = pte_entry & >>> 0xFFFFFFFF000ULL; >>> + pte_fields.fragment = (pte_entry >> 7) & >>> 0x1F; >>> + pte_fields.system = (pte_entry >> 1) & 1; >>> + pte_fields.valid = pte_entry & 1; >>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >>> (unsigned long long)pte_fields.page_base_addr, >>> (unsigned)pte_fields.fragment, (int)pte_fields.system, >>> (int)pte_fields.valid); >>> + >>> + // compute starting address >>> + start_addr = pte_fields.page_base_addr + (address >>> & 0xFFF); >>> + } >>> + >>> + // read upto 4K from it >>> + // TODO: Support page sizes >4KB >>> + if (((start_addr & 0xFFF) + size) & ~0xFFF) { >>> + chunk_size = 0x1000 - (start_addr & 0xFFF); >>> + } else { >>> + chunk_size = size; >>> + } >>> + DEBUG("Computed address we will read from: %s:%llx >>> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", >>> (unsigned long >>> long)start_addr, (unsigned long)chunk_size); >>> + if (pte_fields.system) { >>> + if (umr_read_sram(start_addr, chunk_size, pdst) >>> < 0) >>> { >>> + fprintf(stderr, "[ERROR] Cannot read >>> system >>> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); >>> + fprintf(stderr, "[ERROR] Alternatively >>> download and install /dev/fmem\n"); >>> + return -1; >>> + } >>> + } else { >>> + if (umr_read_vram(asic, 0xFFFF, start_addr, >>> chunk_size, pdst) < 0) { >>> + fprintf(stderr, "[ERROR] Cannot read from >>> VRAM\n"); >>> + return -1; >>> + } >>> + } >>> + pdst += chunk_size; >>> + size -= chunk_size; >>> + address += chunk_size; >>> + } >>> + return 0; >>> +} >>> >>> int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t >>> address, >>> uint32_t size, void *dst) >>> { >>> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t >>> vmid, uint64_t address, uint32 >>> return 0; >>> } >>> >>> - if (asic->family == FAMILY_VI) >>> - return umr_read_vram_vi(asic, vmid, address, size, dst); >>> + switch (asic->family) { >>> + case FAMILY_VI: >>> + return umr_read_vram_vi(asic, vmid, address, size, >>> dst); >>> + case FAMILY_AI: >>> + return umr_read_vram_ai(asic, vmid, address, size, >>> dst); >>> + default: >>> + fprintf(stderr, "[BUG] Unsupported ASIC family >>> type >>> for umr_read_vram()\n"); >>> + return -1; >>> + } >>> >>> return 0; >>> } >>> -- >>> 2.12.0 >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx