> -----Original Message----- > From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf > Of Tom St Denis > Sent: Thursday, April 06, 2017 1:53 PM > To: amd-gfx at lists.freedesktop.org > Cc: StDenis, Tom > Subject: [PATCH umr] Add initial AI VM decoding > > Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet. > > Signed-off-by: Tom St Denis <tom.stdenis at amd.com> Looks reasonable: Acked-by: Alex Deucher <alexander.deucher at amd.com> > --- > src/lib/read_vram.c | 180 > ++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 176 insertions(+), 4 deletions(-) > > diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c > index e2087a252c10..4c74f4521857 100644 > --- a/src/lib/read_vram.c > +++ b/src/lib/read_vram.c > @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t > size, void *dst) > return -1; > } > > - > static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t > address, uint32_t size, void *dst) > { > uint64_t start_addr, page_table_start_addr, page_table_base_addr, > @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, > uint32_t vmid, uint64_t addre > if (page_table_depth == 1) { > // decode addr into pte and pde selectors... > pde_idx = (address >> (12 + 9 + page_table_size)) & > ((1ULL << (40 - 12 - 9 - page_table_size)) - 1); > - pte_idx = (address >> 12) & ((1ULL << (9 + > page_table_size)) - 1); > + pte_idx = (address >> (12 + page_table_size - 4)) & > ((1ULL << (9 + page_table_size)) - 1); > > // read PDE entry > umr_read_vram(asic, 0xFFFF, page_table_base_addr > + pde_idx * 8, 8, &pde_entry); > @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, > uint32_t vmid, uint64_t addre > return 0; > } > > +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t > address, uint32_t size, void *dst) > +{ > + uint64_t start_addr, page_table_start_addr, page_table_base_addr, > + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry, > + pde_address; > + uint32_t chunk_size, tmp; > + int page_table_depth, first; > + struct { > + uint64_t > + frag_size, > + pte_base_addr, > + valid; > + } pde_fields; > + struct { > + uint64_t > + page_base_addr, > + fragment, > + system, > + valid; > + } pte_fields; > + char buf[64]; > + unsigned char *pdst = dst; > + > + /* > + * PTE format on VI: > + * 63:40 reserved > + * 39:12 4k physical page base address > + * 11:7 fragment > + * 6 write > + * 5 read > + * 4 exe > + * 3 reserved > + * 2 snooped > + * 1 system > + * 0 valid > + * > + * PDE format on VI: > + * 63:59 block fragment size > + * 58:40 reserved > + * 39:1 physical base address of PTE > + * bits 5:1 must be 0. > + * 0 valid > + */ > + > + // read vm registers > + sprintf(buf, > "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid); > + page_table_start_addr = > (uint64_t)umr_read_reg_by_name(asic, buf) << 12; > + sprintf(buf, > "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid); > + page_table_start_addr |= > (uint64_t)umr_read_reg_by_name(asic, buf) << 44; > + > + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid); > + tmp = umr_read_reg_by_name(asic, buf); > + page_table_depth = umr_bitslice_reg_by_name(asic, buf, > "PAGE_TABLE_DEPTH", tmp); > + page_table_size = umr_bitslice_reg_by_name(asic, buf, > "PAGE_TABLE_BLOCK_SIZE", tmp); > + > + sprintf(buf, > "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid); > + page_table_base_addr = > (uint64_t)umr_read_reg_by_name(asic, buf) << 0; > + sprintf(buf, > "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid); > + page_table_base_addr |= > (uint64_t)umr_read_reg_by_name(asic, buf) << 32; > + > + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address); > + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long > long)page_table_start_addr); > + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long > long)page_table_base_addr); > + DEBUG("BASE_SIZE = %lu\n", page_table_size); > + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); > + > + address -= page_table_start_addr; > + > + // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set > to >=4 > + page_table_size = page_table_size ? page_table_size : 4; > + > + first = 1; > + while (size) { > + if (page_table_depth >= 1) { > + // page_table_base_addr is not a PDE entry in this > config so shift it out (it's a page address) > + page_table_base_addr <<= 12; > + pte_idx = (address >> (12 + page_table_size - 4)) & > ((1ULL << (9 + page_table_size)) - 1); > + > + // AI+ supports more than 1 level of PDEs so we > iterate for all of the depths > + pde_address = address; > + while (page_table_depth) { > + // decode addr into pte and pde selectors... > + pde_idx = (pde_address >> > (page_table_depth*9 + (12 + page_table_size - 4))); > + > + // don't mask the first PDE idx > + if (!first) > + pde_idx &= (1ULL << 9) - 1; > + first = 0; > + > + // read PDE entry > + umr_read_vram(asic, 0xFFFF, > page_table_base_addr + pde_idx * 8, 8, &pde_entry); > + > + // decode PDE values > + pde_fields.frag_size = (pde_entry >> 59) & > 0x1F; > + pde_fields.pte_base_addr = pde_entry & > 0xFFFFFFFFF000ULL; > + pde_fields.valid = pde_entry & 1; > + DEBUG("pde_idx=%llx, frag_size=%u, > pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, > (unsigned)pde_fields.frag_size, (unsigned long > long)pde_fields.pte_base_addr, (int)pde_fields.valid); > + > + // for the next round the address we're > decoding is the phys address in the currently decoded PDE > + --page_table_depth; > + pde_address = pde_fields.pte_base_addr; > + } > + > + // now read PTE entry for this page > + umr_read_vram(asic, 0xFFFF, > pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); > + > + // decode PTE values > + pte_fields.page_base_addr = pte_entry & > 0xFFFFFFFFF000ULL; > + pte_fields.fragment = (pte_entry >> 7) & 0x1F; > + pte_fields.system = (pte_entry >> 1) & 1; > + pte_fields.valid = pte_entry & 1; > + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, > fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, > (unsigned long long)pte_fields.page_base_addr, > (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); > + > + // compute starting address > + start_addr = pte_fields.page_base_addr + (address > & 0xFFF); > + } else { > + // in AI+ the BASE_ADDR is treated like a PDE entry... > + // decode PDE values > + pde_idx = 0; // unused > + pde_fields.frag_size = (page_table_base_addr >> > 59) & 0x1F; > + pde_fields.pte_base_addr = page_table_base_addr > & 0xFFFFFFFFF000ULL; > + pde_fields.valid = page_table_base_addr & 1; > + DEBUG("pde_idx=%llx, frag_size=%u, > pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, > (unsigned)pde_fields.frag_size, (unsigned long > long)pde_fields.pte_base_addr, (int)pde_fields.valid); > + > + // PTE addr = baseaddr[47:6] + (logical - start) >> > fragsize) > + pte_idx = (address >> (12 + pde_fields.frag_size)); > + > + umr_read_vram(asic, 0xFFFF, > pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry); > + > + // decode PTE values > + pte_fields.page_base_addr = pte_entry & > 0xFFFFFFFF000ULL; > + pte_fields.fragment = (pte_entry >> 7) & 0x1F; > + pte_fields.system = (pte_entry >> 1) & 1; > + pte_fields.valid = pte_entry & 1; > + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, > fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, > (unsigned long long)pte_fields.page_base_addr, > (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid); > + > + // compute starting address > + start_addr = pte_fields.page_base_addr + (address > & 0xFFF); > + } > + > + // read upto 4K from it > + // TODO: Support page sizes >4KB > + if (((start_addr & 0xFFF) + size) & ~0xFFF) { > + chunk_size = 0x1000 - (start_addr & 0xFFF); > + } else { > + chunk_size = size; > + } > + DEBUG("Computed address we will read from: %s:%llx > (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long > long)start_addr, (unsigned long)chunk_size); > + if (pte_fields.system) { > + if (umr_read_sram(start_addr, chunk_size, pdst) < 0) > { > + fprintf(stderr, "[ERROR] Cannot read system > ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); > + fprintf(stderr, "[ERROR] Alternatively > download and install /dev/fmem\n"); > + return -1; > + } > + } else { > + if (umr_read_vram(asic, 0xFFFF, start_addr, > chunk_size, pdst) < 0) { > + fprintf(stderr, "[ERROR] Cannot read from > VRAM\n"); > + return -1; > + } > + } > + pdst += chunk_size; > + size -= chunk_size; > + address += chunk_size; > + } > + return 0; > +} > > int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, > uint32_t size, void *dst) > { > @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t > vmid, uint64_t address, uint32 > return 0; > } > > - if (asic->family == FAMILY_VI) > - return umr_read_vram_vi(asic, vmid, address, size, dst); > + switch (asic->family) { > + case FAMILY_VI: > + return umr_read_vram_vi(asic, vmid, address, size, > dst); > + case FAMILY_AI: > + return umr_read_vram_ai(asic, vmid, address, size, > dst); > + default: > + fprintf(stderr, "[BUG] Unsupported ASIC family type > for umr_read_vram()\n"); > + return -1; > + } > > return 0; > } > -- > 2.12.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx