[PATCH umr] Add initial AI VM decoding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 06/04/17 02:11 PM, Deucher, Alexander wrote:
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>> Of Tom St Denis
>> Sent: Thursday, April 06, 2017 1:53 PM
>> To: amd-gfx at lists.freedesktop.org
>> Cc: StDenis, Tom
>> Subject: [PATCH umr] Add initial AI VM decoding
>>
>> Tested with VMID0 decodings just fine.  Haven't tried VMID1-15 yet.
>>
>> Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
>
> Looks reasonable:
> Acked-by: Alex Deucher <alexander.deucher at amd.com>

Thanks.  What I'm leery about is the page_table_size.  On VI it's always 
4KB (with our kernel) so the logic isn't vetted much in umr.

Then to add to it it seems AI+ interpretation is a bit different with 
0==4KB (instead of 4).  I presume values greater than 0 are for 
multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 = 
8KB of next level entries, etc.

I'll try adding a stall to libdrm's GFX test again and see if the VMID 
 >= 1 decoding works in the meantime.

Cheers,
Tom



>
>> ---
>>  src/lib/read_vram.c | 180
>> ++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 176 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
>> index e2087a252c10..4c74f4521857 100644
>> --- a/src/lib/read_vram.c
>> +++ b/src/lib/read_vram.c
>> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t
>> size, void *dst)
>>        return -1;
>>  }
>>
>> -
>>  static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t
>> address, uint32_t size, void *dst)
>>  {
>>        uint64_t start_addr, page_table_start_addr, page_table_base_addr,
>> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic,
>> uint32_t vmid, uint64_t addre
>>                if (page_table_depth == 1) {
>>                        // decode addr into pte and pde selectors...
>>                        pde_idx = (address >> (12 + 9 + page_table_size)) &
>> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
>> -                     pte_idx = (address >> 12) & ((1ULL << (9 +
>> page_table_size)) - 1);
>> +                     pte_idx = (address >> (12 + page_table_size - 4)) &
>> ((1ULL << (9 + page_table_size)) - 1);
>>
>>                        // read PDE entry
>>                        umr_read_vram(asic, 0xFFFF, page_table_base_addr
>> + pde_idx * 8, 8, &pde_entry);
>> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic,
>> uint32_t vmid, uint64_t addre
>>        return 0;
>>  }
>>
>> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t
>> address, uint32_t size, void *dst)
>> +{
>> +     uint64_t start_addr, page_table_start_addr, page_table_base_addr,
>> +              page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
>> +              pde_address;
>> +     uint32_t chunk_size, tmp;
>> +     int page_table_depth, first;
>> +     struct {
>> +             uint64_t
>> +                     frag_size,
>> +                     pte_base_addr,
>> +                     valid;
>> +     } pde_fields;
>> +     struct {
>> +             uint64_t
>> +                     page_base_addr,
>> +                     fragment,
>> +                     system,
>> +                     valid;
>> +     } pte_fields;
>> +     char buf[64];
>> +     unsigned char *pdst = dst;
>> +
>> +     /*
>> +      * PTE format on VI:
>> +      * 63:40 reserved
>> +      * 39:12 4k physical page base address
>> +      * 11:7 fragment
>> +      * 6 write
>> +      * 5 read
>> +      * 4 exe
>> +      * 3 reserved
>> +      * 2 snooped
>> +      * 1 system
>> +      * 0 valid
>> +      *
>> +      * PDE format on VI:
>> +      * 63:59 block fragment size
>> +      * 58:40 reserved
>> +      * 39:1 physical base address of PTE
>> +      * bits 5:1 must be 0.
>> +      * 0 valid
>> +      */
>> +
>> +     // read vm registers
>> +     sprintf(buf,
>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
>> +             page_table_start_addr =
>> (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
>> +     sprintf(buf,
>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
>> +             page_table_start_addr |=
>> (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
>> +
>> +     sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
>> +             tmp = umr_read_reg_by_name(asic, buf);
>> +             page_table_depth      = umr_bitslice_reg_by_name(asic, buf,
>> "PAGE_TABLE_DEPTH", tmp);
>> +             page_table_size       = umr_bitslice_reg_by_name(asic, buf,
>> "PAGE_TABLE_BLOCK_SIZE", tmp);
>> +
>> +     sprintf(buf,
>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
>> +             page_table_base_addr  =
>> (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
>> +     sprintf(buf,
>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
>> +             page_table_base_addr  |=
>> (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
>> +
>> +     DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
>> +     DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long
>> long)page_table_start_addr);
>> +     DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long
>> long)page_table_base_addr);
>> +     DEBUG("BASE_SIZE = %lu\n", page_table_size);
>> +     DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
>> +
>> +     address -= page_table_start_addr;
>> +
>> +     // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set
>> to >=4
>> +     page_table_size = page_table_size ? page_table_size : 4;
>> +
>> +     first = 1;
>> +     while (size) {
>> +             if (page_table_depth >= 1) {
>> +                     // page_table_base_addr is not a PDE entry in this
>> config so shift it out (it's a page address)
>> +                     page_table_base_addr <<= 12;
>> +                     pte_idx = (address >> (12 + page_table_size - 4)) &
>> ((1ULL << (9 + page_table_size)) - 1);
>> +
>> +                     // AI+ supports more than 1 level of PDEs so we
>> iterate for all of the depths
>> +                     pde_address = address;
>> +                     while (page_table_depth) {
>> +                             // decode addr into pte and pde selectors...
>> +                             pde_idx = (pde_address >>
>> (page_table_depth*9 + (12 + page_table_size - 4)));
>> +
>> +                             // don't mask the first PDE idx
>> +                             if (!first)
>> +                                     pde_idx &= (1ULL << 9) - 1;
>> +                             first = 0;
>> +
>> +                             // read PDE entry
>> +                             umr_read_vram(asic, 0xFFFF,
>> page_table_base_addr + pde_idx * 8, 8, &pde_entry);
>> +
>> +                             // decode PDE values
>> +                             pde_fields.frag_size     = (pde_entry >> 59) &
>> 0x1F;
>> +                             pde_fields.pte_base_addr = pde_entry &
>> 0xFFFFFFFFF000ULL;
>> +                             pde_fields.valid         = pde_entry & 1;
>> +                             DEBUG("pde_idx=%llx, frag_size=%u,
>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
>> (unsigned)pde_fields.frag_size, (unsigned long
>> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>> +
>> +                             // for the next round the address we're
>> decoding is the phys address in the currently decoded PDE
>> +                             --page_table_depth;
>> +                             pde_address = pde_fields.pte_base_addr;
>> +                     }
>> +
>> +                     // now read PTE entry for this page
>> +                     umr_read_vram(asic, 0xFFFF,
>> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
>> +
>> +                     // decode PTE values
>> +                     pte_fields.page_base_addr = pte_entry &
>> 0xFFFFFFFFF000ULL;
>> +                     pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
>> +                     pte_fields.system         = (pte_entry >> 1) & 1;
>> +                     pte_fields.valid          = pte_entry & 1;
>> +                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
>> (unsigned long long)pte_fields.page_base_addr,
>> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>> +
>> +                     // compute starting address
>> +                     start_addr = pte_fields.page_base_addr + (address
>> & 0xFFF);
>> +             } else {
>> +                     // in AI+ the BASE_ADDR is treated like a PDE entry...
>> +                     // decode PDE values
>> +                     pde_idx = 0; // unused
>> +                     pde_fields.frag_size     = (page_table_base_addr >>
>> 59) & 0x1F;
>> +                     pde_fields.pte_base_addr = page_table_base_addr
>> & 0xFFFFFFFFF000ULL;
>> +                     pde_fields.valid         = page_table_base_addr & 1;
>> +                     DEBUG("pde_idx=%llx, frag_size=%u,
>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
>> (unsigned)pde_fields.frag_size, (unsigned long
>> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>> +
>> +                     // PTE addr = baseaddr[47:6] + (logical - start) >>
>> fragsize)
>> +                     pte_idx = (address >> (12 + pde_fields.frag_size));
>> +
>> +                     umr_read_vram(asic, 0xFFFF,
>> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
>> +
>> +                     // decode PTE values
>> +                     pte_fields.page_base_addr = pte_entry &
>> 0xFFFFFFFF000ULL;
>> +                     pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
>> +                     pte_fields.system         = (pte_entry >> 1) & 1;
>> +                     pte_fields.valid          = pte_entry & 1;
>> +                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
>> (unsigned long long)pte_fields.page_base_addr,
>> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>> +
>> +                     // compute starting address
>> +                     start_addr = pte_fields.page_base_addr + (address
>> & 0xFFF);
>> +             }
>> +
>> +             // read upto 4K from it
>> +             // TODO: Support page sizes >4KB
>> +             if (((start_addr & 0xFFF) + size) & ~0xFFF) {
>> +                     chunk_size = 0x1000 - (start_addr & 0xFFF);
>> +             } else {
>> +                     chunk_size = size;
>> +             }
>> +             DEBUG("Computed address we will read from: %s:%llx
>> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long
>> long)start_addr, (unsigned long)chunk_size);
>> +             if (pte_fields.system) {
>> +                     if (umr_read_sram(start_addr, chunk_size, pdst) < 0)
>> {
>> +                             fprintf(stderr, "[ERROR] Cannot read system
>> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
>> +                             fprintf(stderr, "[ERROR] Alternatively
>> download and install /dev/fmem\n");
>> +                             return -1;
>> +                     }
>> +             } else {
>> +                     if (umr_read_vram(asic, 0xFFFF, start_addr,
>> chunk_size, pdst) < 0) {
>> +                             fprintf(stderr, "[ERROR] Cannot read from
>> VRAM\n");
>> +                             return -1;
>> +                     }
>> +             }
>> +             pdst += chunk_size;
>> +             size -= chunk_size;
>> +             address += chunk_size;
>> +     }
>> +     return 0;
>> +}
>>
>>  int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address,
>> uint32_t size, void *dst)
>>  {
>> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t
>> vmid, uint64_t address, uint32
>>                return 0;
>>        }
>>
>> -     if (asic->family == FAMILY_VI)
>> -             return umr_read_vram_vi(asic, vmid, address, size, dst);
>> +     switch (asic->family) {
>> +             case FAMILY_VI:
>> +                     return umr_read_vram_vi(asic, vmid, address, size,
>> dst);
>> +             case FAMILY_AI:
>> +                     return umr_read_vram_ai(asic, vmid, address, size,
>> dst);
>> +             default:
>> +                     fprintf(stderr, "[BUG] Unsupported ASIC family type
>> for umr_read_vram()\n");
>> +                     return -1;
>> +     }
>>
>>        return 0;
>>  }
>> --
>> 2.12.0
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux