Brings decoding of PDEs and PTEs for AI+ chips into their own functions, so that we don't end up with subtly different decoding bugs in the variety of places such decodings are done. Also fixes a minor bug where we were pulling PTE.PRT from bit 61 instead of the proper bit 51. Signed-off-by: Joseph Greathouse <Joseph.Greathouse@xxxxxxx> --- src/lib/read_vram.c | 187 ++++++++++++++++++++++++++------------------ 1 file changed, 109 insertions(+), 78 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 049acd4..2998873 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -317,6 +317,104 @@ static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table return vm_bits; } +typedef struct { + uint64_t + frag_size, + pte_base_addr, + valid, + system, + coherent, + pte, + further; +} pde_fields_ai_t; + +typedef struct { + uint64_t + valid, + system, + coherent, + tmz, + execute, + read, + write, + fragment, + page_base_addr, + prt, + pde, + further, + mtype; +} pte_fields_ai_t; + +/* + * PDE format on AI: + * 63:59 block fragment size + * 58:55 reserved + * But if bit 56 is set, this is a PTE with 'further' set, + * which makes it act like a PDE. + * 54 pde-is-pte + * 53:48 reserved + * 47:6 physical base address of PTE + * 2 cache coherent/snoop + * 1 system + * 0 valid + */ +static pde_fields_ai_t decode_pde_entry_ai(uint64_t pde_entry) +{ + pde_fields_ai_t pde_fields; + pde_fields.frag_size = (pde_entry >> 59) & 0x1F; + pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFFFC0ULL; + pde_fields.valid = pde_entry & 1; + pde_fields.system = (pde_entry >> 1) & 1; + pde_fields.coherent = (pde_entry >> 2) & 1; + pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields.further = (pde_entry >> 56) & 1; + return pde_fields; +} + +/* + * PTE format on AI and PI: + * 58:57 mtype + * 56 further + * 54 reserved + * But if it is set, then this is actually a PDE with 'P' + * bit set, which makes the PDE act like a PTE. + * 51 prt + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 tmz (PI+) + * 2 snooped / coherent + * 1 system + * 0 valid + */ +static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) +{ + pte_fields_ai_t pte_fields; + pte_fields.valid = pte_entry & 1; + pte_fields.system = (pte_entry >> 1) & 1; + pte_fields.coherent = (pte_entry >> 2) & 1; + pte_fields.tmz = (pte_entry >> 3) & 1; + pte_fields.execute = (pte_entry >> 4) & 1; + pte_fields.read = (pte_entry >> 5) & 1; + pte_fields.write = (pte_entry >> 6) & 1; + pte_fields.fragment = (pte_entry >> 7) & 0x1F; + pte_fields.prt = (pte_entry >> 51) & 1; + pte_fields.pde = (pte_entry >> 54) & 1; + pte_fields.further = (pte_entry >> 56) & 1; + pte_fields.mtype = (pte_entry >> 57) & 3; + + // PTEs hold physical address in 47:12 + // PDEs hold physical address in 47:6, so if this is a PTE-as-PDE (further), need a differnt mask + if (pte_fields.further) + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL; + else + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL; + + return pte_fields; +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -352,24 +450,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, mmMC_VM_AGP_BOT, mmMC_VM_AGP_TOP; } registers; - struct { - uint64_t - frag_size, - pte_base_addr, - valid, - system, - cache, - pte; - } pde_fields, pde_array[8]; - struct { - uint64_t - page_base_addr, - fragment, - system, - valid, - prt, - further; - } pte_fields; + + pde_fields_ai_t pde_fields, pde_array[8]; + pte_fields_ai_t pte_fields; char buf[64]; unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; @@ -379,27 +462,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); - /* - * PTE format on AI: - * 47:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 reserved - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on AI: - * 63:59 block fragment size - * 58:40 reserved - * 47:6 physical base address of PTE - * 2 cache coherent/snoop - * 1 system - * 0 valid - */ - hubid = vmid & 0xFF00; vmid &= 0xFF; @@ -627,13 +689,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, further = 0; if (page_table_depth >= 1) { - // decode PDE values - pde_fields.frag_size = (pde_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL; - pde_fields.valid = pde_entry & 1; - pde_fields.system = (pde_entry >> 1) & 1; - pde_fields.cache = (pde_entry >> 2) & 1; - pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields = decode_pde_entry_ai(pde_entry); // AI+ supports more than 1 level of PDEs so we iterate for all of the depths pde_address = pde_fields.pte_base_addr; @@ -663,7 +719,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_fields.pte_base_addr, pde_fields.valid, pde_fields.system, - pde_fields.cache, + pde_fields.coherent, pde_fields.pte); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); @@ -712,13 +768,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, } } - // decode PDE values - pde_fields.frag_size = (pde_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL; - pde_fields.valid = pde_entry & 1; - pde_fields.system = (pde_entry >> 1) & 1; - pde_fields.cache = (pde_entry >> 2) & 1; - pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields = decode_pde_entry_ai(pde_entry); if (current_depth == 1) { pde0_block_fragment_size = pde_fields.frag_size; /* @@ -751,7 +801,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_fields.pte_base_addr, pde_fields.valid, pde_fields.system, - pde_fields.cache, + pde_fields.coherent, pde_fields.pte, pde_fields.frag_size); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); @@ -817,14 +867,8 @@ pte_further: return -1; } - // decode PTE values pde_is_pte: - pte_fields.fragment = (pte_entry >> 7) & 0x1F; - pte_fields.system = (pte_entry >> 1) & 1; - pte_fields.valid = pte_entry & 1; - pte_fields.prt = (pte_entry >> 61) & 1; - pte_fields.further = (pte_entry >> 56) & 1; - pte_fields.page_base_addr = pte_entry & (pte_fields.further ? 0xFFFFFFFFFFC0ULL : 0xFFFFFFFFF000ULL); + pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", @@ -901,12 +945,7 @@ pde_is_pte: va_mask &= (upper_mask & ~pte_page_mask); // grab PTE base address and other data from the PTE that has the F bit set. - pde_fields.frag_size = (pte_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL; - pde_fields.valid = pte_entry & 1; - pde_fields.system = (pte_entry >> 1) & 1; - pde_fields.cache = (pte_entry >> 2) & 1; - pde_fields.pte = 0; + pde_fields = decode_pde_entry_ai(pte_entry); further = 1; goto pte_further; } @@ -928,12 +967,9 @@ pde_is_pte: } else { // in AI+ the BASE_ADDR is treated like a PDE entry... // decode PDE values - pde_fields.frag_size = (page_table_base_addr >> 59) & 0x1F; + pde_fields = decode_pde_entry_ai(pde_entry); pde0_block_fragment_size = pde_fields.frag_size; pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; - pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL; - pde_fields.system = (page_table_base_addr >> 1) & 1; - pde_fields.valid = page_table_base_addr & 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", @@ -953,12 +989,7 @@ pde_is_pte: if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0) return -1; - // decode PTE values - pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL; - pte_fields.fragment = (pte_entry >> 7) & 0x1F; - pte_fields.system = (pte_entry >> 1) & 1; - pte_fields.valid = pte_entry & 1; - pte_fields.prt = 0; + pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx