> Thanks. What I'm leery about is the page_table_size. On VI it's always > 4KB (with our kernel) so the logic isn't vetted much in umr. Actually that assumption is completely incorrect. On VI the PT and PD size are usually much larger than 4KB because they only support one extra level of page tables. Christian. Am 06.04.2017 um 20:19 schrieb Tom St Denis: > The more I think about it I wonder if > > // AI+ allows 0=default (4KB) whereas VI requires it to be > explictly set to >=4 > page_table_size = page_table_size ? page_table_size : 4; > > Should be instead > > page_table_size += 4; > > Since in the later code > > pde_idx = (pde_address >> (page_table_depth*9 + (12 + > page_table_size - 4))); > > Would result in a 9-bit page size if say page_table_size was 1 which > should actually be 13 bits (8KB). > > Tom > > On 06/04/17 02:14 PM, Tom St Denis wrote: >> On 06/04/17 02:11 PM, Deucher, Alexander wrote: >>>> -----Original Message----- >>>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf >>>> Of Tom St Denis >>>> Sent: Thursday, April 06, 2017 1:53 PM >>>> To: amd-gfx at lists.freedesktop.org >>>> Cc: StDenis, Tom >>>> Subject: [PATCH umr] Add initial AI VM decoding >>>> >>>> Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet. >>>> >>>> Signed-off-by: Tom St Denis <tom.stdenis at amd.com> >>> >>> Looks reasonable: >>> Acked-by: Alex Deucher <alexander.deucher at amd.com> >> >> Thanks. What I'm leery about is the page_table_size. On VI it's always >> 4KB (with our kernel) so the logic isn't vetted much in umr. >> >> Then to add to it it seems AI+ interpretation is a bit different with >> 0==4KB (instead of 4). I presume values greater than 0 are for >> multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 = >> 8KB of next level entries, etc. >> >> I'll try adding a stall to libdrm's GFX test again and see if the VMID >>> = 1 decoding works in the meantime. >> >> Cheers, >> Tom >> >> >> >>> >>>> --- >>>> src/lib/read_vram.c | 180 >>>> ++++++++++++++++++++++++++++++++++++++++++++++++++-- >>>> 1 file changed, 176 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c >>>> index e2087a252c10..4c74f4521857 100644 >>>> --- a/src/lib/read_vram.c >>>> +++ b/src/lib/read_vram.c >>>> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t >>>> size, void *dst) >>>> return -1; >>>> } >>>> >>>> - >>>> static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, >>>> uint64_t >>>> address, uint32_t size, void *dst) >>>> { >>>> uint64_t start_addr, page_table_start_addr, >>>> page_table_base_addr, >>>> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, >>>> uint32_t vmid, uint64_t addre >>>> if (page_table_depth == 1) { >>>> // decode addr into pte and pde selectors... >>>> pde_idx = (address >> (12 + 9 + >>>> page_table_size)) & >>>> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1); >>>> - pte_idx = (address >> 12) & ((1ULL << (9 + >>>> page_table_size)) - 1); >>>> + pte_idx = (address >> (12 + page_table_size - >>>> 4)) & >>>> ((1ULL << (9 + page_table_size)) - 1); >>>> >>>> // read PDE entry >>>> umr_read_vram(asic, 0xFFFF, >>>> page_table_base_addr >>>> + pde_idx * 8, 8, &pde_entry); >>>> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic >>>> *asic, >>>> uint32_t vmid, uint64_t addre >>>> return 0; >>>> } >>>> >>>> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, >>>> uint64_t >>>> address, uint32_t size, void *dst) >>>> +{ >>>> + uint64_t start_addr, page_table_start_addr, >>>> page_table_base_addr, >>>> + page_table_size, pte_idx, pde_idx, pte_entry, >>>> pde_entry, >>>> + pde_address; >>>> + uint32_t chunk_size, tmp; >>>> + int page_table_depth, first; >>>> + struct { >>>> + uint64_t >>>> + frag_size, >>>> + pte_base_addr, >>>> + valid; >>>> + } pde_fields; >>>> + struct { >>>> + uint64_t >>>> + page_base_addr, >>>> + fragment, >>>> + system, >>>> + valid; >>>> + } pte_fields; >>>> + char buf[64]; >>>> + unsigned char *pdst = dst; >>>> + >>>> + /* >>>> + * PTE format on VI: >>>> + * 63:40 reserved >>>> + * 39:12 4k physical page base address >>>> + * 11:7 fragment >>>> + * 6 write >>>> + * 5 read >>>> + * 4 exe >>>> + * 3 reserved >>>> + * 2 snooped >>>> + * 1 system >>>> + * 0 valid >>>> + * >>>> + * PDE format on VI: >>>> + * 63:59 block fragment size >>>> + * 58:40 reserved >>>> + * 39:1 physical base address of PTE >>>> + * bits 5:1 must be 0. >>>> + * 0 valid >>>> + */ >>>> + >>>> + // read vm registers >>>> + sprintf(buf, >>>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); >>>> + page_table_start_addr = >>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 12; >>>> + sprintf(buf, >>>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); >>>> + page_table_start_addr |= >>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 44; >>>> + >>>> + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); >>>> + tmp = umr_read_reg_by_name(asic, buf); >>>> + page_table_depth = umr_bitslice_reg_by_name(asic, >>>> buf, >>>> "PAGE_TABLE_DEPTH", tmp); >>>> + page_table_size = umr_bitslice_reg_by_name(asic, >>>> buf, >>>> "PAGE_TABLE_BLOCK_SIZE", tmp); >>>> + >>>> + sprintf(buf, >>>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); >>>> + page_table_base_addr = >>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 0; >>>> + sprintf(buf, >>>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); >>>> + page_table_base_addr |= >>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 32; >>>> + >>>> + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); >>>> + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long >>>> long)page_table_start_addr); >>>> + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long >>>> long)page_table_base_addr); >>>> + DEBUG("BASE_SIZE = %lu\n", page_table_size); >>>> + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); >>>> + >>>> + address -= page_table_start_addr; >>>> + >>>> + // AI+ allows 0=default (4KB) whereas VI requires it to be >>>> explictly set >>>> to >=4 >>>> + page_table_size = page_table_size ? page_table_size : 4; >>>> + >>>> + first = 1; >>>> + while (size) { >>>> + if (page_table_depth >= 1) { >>>> + // page_table_base_addr is not a PDE entry in >>>> this >>>> config so shift it out (it's a page address) >>>> + page_table_base_addr <<= 12; >>>> + pte_idx = (address >> (12 + page_table_size - >>>> 4)) & >>>> ((1ULL << (9 + page_table_size)) - 1); >>>> + >>>> + // AI+ supports more than 1 level of PDEs so we >>>> iterate for all of the depths >>>> + pde_address = address; >>>> + while (page_table_depth) { >>>> + // decode addr into pte and pde >>>> selectors... >>>> + pde_idx = (pde_address >> >>>> (page_table_depth*9 + (12 + page_table_size - 4))); >>>> + >>>> + // don't mask the first PDE idx >>>> + if (!first) >>>> + pde_idx &= (1ULL << 9) - 1; >>>> + first = 0; >>>> + >>>> + // read PDE entry >>>> + umr_read_vram(asic, 0xFFFF, >>>> page_table_base_addr + pde_idx * 8, 8, &pde_entry); >>>> + >>>> + // decode PDE values >>>> + pde_fields.frag_size = (pde_entry >>>> >> 59) & >>>> 0x1F; >>>> + pde_fields.pte_base_addr = pde_entry & >>>> 0xFFFFFFFFF000ULL; >>>> + pde_fields.valid = pde_entry >>>> & 1; >>>> + DEBUG("pde_idx=%llx, frag_size=%u, >>>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >>>> (unsigned)pde_fields.frag_size, (unsigned long >>>> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >>>> + >>>> + // for the next round the address we're >>>> decoding is the phys address in the currently decoded PDE >>>> + --page_table_depth; >>>> + pde_address = pde_fields.pte_base_addr; >>>> + } >>>> + >>>> + // now read PTE entry for this page >>>> + umr_read_vram(asic, 0xFFFF, >>>> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); >>>> + >>>> + // decode PTE values >>>> + pte_fields.page_base_addr = pte_entry & >>>> 0xFFFFFFFFF000ULL; >>>> + pte_fields.fragment = (pte_entry >> 7) & >>>> 0x1F; >>>> + pte_fields.system = (pte_entry >> 1) >>>> & 1; >>>> + pte_fields.valid = pte_entry & 1; >>>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >>>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >>>> (unsigned long long)pte_fields.page_base_addr, >>>> (unsigned)pte_fields.fragment, (int)pte_fields.system, >>>> (int)pte_fields.valid); >>>> + >>>> + // compute starting address >>>> + start_addr = pte_fields.page_base_addr + >>>> (address >>>> & 0xFFF); >>>> + } else { >>>> + // in AI+ the BASE_ADDR is treated like a PDE >>>> entry... >>>> + // decode PDE values >>>> + pde_idx = 0; // unused >>>> + pde_fields.frag_size = >>>> (page_table_base_addr >> >>>> 59) & 0x1F; >>>> + pde_fields.pte_base_addr = page_table_base_addr >>>> & 0xFFFFFFFFF000ULL; >>>> + pde_fields.valid = page_table_base_addr >>>> & 1; >>>> + DEBUG("pde_idx=%llx, frag_size=%u, >>>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, >>>> (unsigned)pde_fields.frag_size, (unsigned long >>>> long)pde_fields.pte_base_addr, (int)pde_fields.valid); >>>> + >>>> + // PTE addr = baseaddr[47:6] + (logical - >>>> start) >> >>>> fragsize) >>>> + pte_idx = (address >> (12 + >>>> pde_fields.frag_size)); >>>> + >>>> + umr_read_vram(asic, 0xFFFF, >>>> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry); >>>> + >>>> + // decode PTE values >>>> + pte_fields.page_base_addr = pte_entry & >>>> 0xFFFFFFFF000ULL; >>>> + pte_fields.fragment = (pte_entry >> 7) & >>>> 0x1F; >>>> + pte_fields.system = (pte_entry >> 1) >>>> & 1; >>>> + pte_fields.valid = pte_entry & 1; >>>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, >>>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, >>>> (unsigned long long)pte_fields.page_base_addr, >>>> (unsigned)pte_fields.fragment, (int)pte_fields.system, >>>> (int)pte_fields.valid); >>>> + >>>> + // compute starting address >>>> + start_addr = pte_fields.page_base_addr + >>>> (address >>>> & 0xFFF); >>>> + } >>>> + >>>> + // read upto 4K from it >>>> + // TODO: Support page sizes >4KB >>>> + if (((start_addr & 0xFFF) + size) & ~0xFFF) { >>>> + chunk_size = 0x1000 - (start_addr & 0xFFF); >>>> + } else { >>>> + chunk_size = size; >>>> + } >>>> + DEBUG("Computed address we will read from: %s:%llx >>>> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", >>>> (unsigned long >>>> long)start_addr, (unsigned long)chunk_size); >>>> + if (pte_fields.system) { >>>> + if (umr_read_sram(start_addr, chunk_size, pdst) >>>> < 0) >>>> { >>>> + fprintf(stderr, "[ERROR] Cannot read >>>> system >>>> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); >>>> + fprintf(stderr, "[ERROR] Alternatively >>>> download and install /dev/fmem\n"); >>>> + return -1; >>>> + } >>>> + } else { >>>> + if (umr_read_vram(asic, 0xFFFF, start_addr, >>>> chunk_size, pdst) < 0) { >>>> + fprintf(stderr, "[ERROR] Cannot read >>>> from >>>> VRAM\n"); >>>> + return -1; >>>> + } >>>> + } >>>> + pdst += chunk_size; >>>> + size -= chunk_size; >>>> + address += chunk_size; >>>> + } >>>> + return 0; >>>> +} >>>> >>>> int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t >>>> address, >>>> uint32_t size, void *dst) >>>> { >>>> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t >>>> vmid, uint64_t address, uint32 >>>> return 0; >>>> } >>>> >>>> - if (asic->family == FAMILY_VI) >>>> - return umr_read_vram_vi(asic, vmid, address, size, dst); >>>> + switch (asic->family) { >>>> + case FAMILY_VI: >>>> + return umr_read_vram_vi(asic, vmid, address, >>>> size, >>>> dst); >>>> + case FAMILY_AI: >>>> + return umr_read_vram_ai(asic, vmid, address, >>>> size, >>>> dst); >>>> + default: >>>> + fprintf(stderr, "[BUG] Unsupported ASIC family >>>> type >>>> for umr_read_vram()\n"); >>>> + return -1; >>>> + } >>>> >>>> return 0; >>>> } >>>> -- >>>> 2.12.0 >>>> >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx at lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx