[PATCH umr] Add initial AI VM decoding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Tom St Denis
> Sent: Thursday, April 06, 2017 1:53 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: StDenis, Tom
> Subject: [PATCH umr] Add initial AI VM decoding
> 
> Tested with VMID0 decodings just fine.  Haven't tried VMID1-15 yet.
> 
> Signed-off-by: Tom St Denis <tom.stdenis at amd.com>

Looks reasonable:
Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  src/lib/read_vram.c | 180
> ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 176 insertions(+), 4 deletions(-)
> 
> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
> index e2087a252c10..4c74f4521857 100644
> --- a/src/lib/read_vram.c
> +++ b/src/lib/read_vram.c
> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t
> size, void *dst)
>  	return -1;
>  }
> 
> -
>  static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t
> address, uint32_t size, void *dst)
>  {
>  	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic,
> uint32_t vmid, uint64_t addre
>  		if (page_table_depth == 1) {
>  			// decode addr into pte and pde selectors...
>  			pde_idx = (address >> (12 + 9 + page_table_size)) &
> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
> -			pte_idx = (address >> 12) & ((1ULL << (9 +
> page_table_size)) - 1);
> +			pte_idx = (address >> (12 + page_table_size - 4)) &
> ((1ULL << (9 + page_table_size)) - 1);
> 
>  			// read PDE entry
>  			umr_read_vram(asic, 0xFFFF, page_table_base_addr
> + pde_idx * 8, 8, &pde_entry);
> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic,
> uint32_t vmid, uint64_t addre
>  	return 0;
>  }
> 
> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t
> address, uint32_t size, void *dst)
> +{
> +	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
> +		 page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
> +		 pde_address;
> +	uint32_t chunk_size, tmp;
> +	int page_table_depth, first;
> +	struct {
> +		uint64_t
> +			frag_size,
> +			pte_base_addr,
> +			valid;
> +	} pde_fields;
> +	struct {
> +		uint64_t
> +			page_base_addr,
> +			fragment,
> +			system,
> +			valid;
> +	} pte_fields;
> +	char buf[64];
> +	unsigned char *pdst = dst;
> +
> +	/*
> +	 * PTE format on VI:
> +	 * 63:40 reserved
> +	 * 39:12 4k physical page base address
> +	 * 11:7 fragment
> +	 * 6 write
> +	 * 5 read
> +	 * 4 exe
> +	 * 3 reserved
> +	 * 2 snooped
> +	 * 1 system
> +	 * 0 valid
> +	 *
> +	 * PDE format on VI:
> +	 * 63:59 block fragment size
> +	 * 58:40 reserved
> +	 * 39:1 physical base address of PTE
> +	 * bits 5:1 must be 0.
> +	 * 0 valid
> +	 */
> +
> +	// read vm registers
> +	sprintf(buf,
> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
> +		page_table_start_addr =
> (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
> +	sprintf(buf,
> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
> +		page_table_start_addr |=
> (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
> +
> +	sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
> +		tmp = umr_read_reg_by_name(asic, buf);
> +		page_table_depth      = umr_bitslice_reg_by_name(asic, buf,
> "PAGE_TABLE_DEPTH", tmp);
> +		page_table_size       = umr_bitslice_reg_by_name(asic, buf,
> "PAGE_TABLE_BLOCK_SIZE", tmp);
> +
> +	sprintf(buf,
> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
> +		page_table_base_addr  =
> (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
> +	sprintf(buf,
> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
> +		page_table_base_addr  |=
> (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
> +
> +	DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
> +	DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long
> long)page_table_start_addr);
> +	DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long
> long)page_table_base_addr);
> +	DEBUG("BASE_SIZE = %lu\n", page_table_size);
> +	DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
> +
> +	address -= page_table_start_addr;
> +
> +	// AI+ allows 0=default (4KB) whereas VI requires it to be explictly set
> to >=4
> +	page_table_size = page_table_size ? page_table_size : 4;
> +
> +	first = 1;
> +	while (size) {
> +		if (page_table_depth >= 1) {
> +			// page_table_base_addr is not a PDE entry in this
> config so shift it out (it's a page address)
> +			page_table_base_addr <<= 12;
> +			pte_idx = (address >> (12 + page_table_size - 4)) &
> ((1ULL << (9 + page_table_size)) - 1);
> +
> +			// AI+ supports more than 1 level of PDEs so we
> iterate for all of the depths
> +			pde_address = address;
> +			while (page_table_depth) {
> +				// decode addr into pte and pde selectors...
> +				pde_idx = (pde_address >>
> (page_table_depth*9 + (12 + page_table_size - 4)));
> +
> +				// don't mask the first PDE idx
> +				if (!first)
> +					pde_idx &= (1ULL << 9) - 1;
> +				first = 0;
> +
> +				// read PDE entry
> +				umr_read_vram(asic, 0xFFFF,
> page_table_base_addr + pde_idx * 8, 8, &pde_entry);
> +
> +				// decode PDE values
> +				pde_fields.frag_size     = (pde_entry >> 59) &
> 0x1F;
> +				pde_fields.pte_base_addr = pde_entry &
> 0xFFFFFFFFF000ULL;
> +				pde_fields.valid         = pde_entry & 1;
> +				DEBUG("pde_idx=%llx, frag_size=%u,
> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
> (unsigned)pde_fields.frag_size, (unsigned long
> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
> +
> +				// for the next round the address we're
> decoding is the phys address in the currently decoded PDE
> +				--page_table_depth;
> +				pde_address = pde_fields.pte_base_addr;
> +			}
> +
> +			// now read PTE entry for this page
> +			umr_read_vram(asic, 0xFFFF,
> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
> +
> +			// decode PTE values
> +			pte_fields.page_base_addr = pte_entry &
> 0xFFFFFFFFF000ULL;
> +			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
> +			pte_fields.system         = (pte_entry >> 1) & 1;
> +			pte_fields.valid          = pte_entry & 1;
> +			DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
> (unsigned long long)pte_fields.page_base_addr,
> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
> +
> +			// compute starting address
> +			start_addr = pte_fields.page_base_addr + (address
> & 0xFFF);
> +		} else {
> +			// in AI+ the BASE_ADDR is treated like a PDE entry...
> +			// decode PDE values
> +			pde_idx = 0; // unused
> +			pde_fields.frag_size     = (page_table_base_addr >>
> 59) & 0x1F;
> +			pde_fields.pte_base_addr = page_table_base_addr
> & 0xFFFFFFFFF000ULL;
> +			pde_fields.valid         = page_table_base_addr & 1;
> +			DEBUG("pde_idx=%llx, frag_size=%u,
> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
> (unsigned)pde_fields.frag_size, (unsigned long
> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
> +
> +			// PTE addr = baseaddr[47:6] + (logical - start) >>
> fragsize)
> +			pte_idx = (address >> (12 + pde_fields.frag_size));
> +
> +			umr_read_vram(asic, 0xFFFF,
> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
> +
> +			// decode PTE values
> +			pte_fields.page_base_addr = pte_entry &
> 0xFFFFFFFF000ULL;
> +			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
> +			pte_fields.system         = (pte_entry >> 1) & 1;
> +			pte_fields.valid          = pte_entry & 1;
> +			DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
> (unsigned long long)pte_fields.page_base_addr,
> (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
> +
> +			// compute starting address
> +			start_addr = pte_fields.page_base_addr + (address
> & 0xFFF);
> +		}
> +
> +		// read upto 4K from it
> +		// TODO: Support page sizes >4KB
> +		if (((start_addr & 0xFFF) + size) & ~0xFFF) {
> +			chunk_size = 0x1000 - (start_addr & 0xFFF);
> +		} else {
> +			chunk_size = size;
> +		}
> +		DEBUG("Computed address we will read from: %s:%llx
> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long
> long)start_addr, (unsigned long)chunk_size);
> +		if (pte_fields.system) {
> +			if (umr_read_sram(start_addr, chunk_size, pdst) < 0)
> {
> +				fprintf(stderr, "[ERROR] Cannot read system
> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
> +				fprintf(stderr, "[ERROR] Alternatively
> download and install /dev/fmem\n");
> +				return -1;
> +			}
> +		} else {
> +			if (umr_read_vram(asic, 0xFFFF, start_addr,
> chunk_size, pdst) < 0) {
> +				fprintf(stderr, "[ERROR] Cannot read from
> VRAM\n");
> +				return -1;
> +			}
> +		}
> +		pdst += chunk_size;
> +		size -= chunk_size;
> +		address += chunk_size;
> +	}
> +	return 0;
> +}
> 
>  int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address,
> uint32_t size, void *dst)
>  {
> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t
> vmid, uint64_t address, uint32
>  		return 0;
>  	}
> 
> -	if (asic->family == FAMILY_VI)
> -		return umr_read_vram_vi(asic, vmid, address, size, dst);
> +	switch (asic->family) {
> +		case FAMILY_VI:
> +			return umr_read_vram_vi(asic, vmid, address, size,
> dst);
> +		case FAMILY_AI:
> +			return umr_read_vram_ai(asic, vmid, address, size,
> dst);
> +		default:
> +			fprintf(stderr, "[BUG] Unsupported ASIC family type
> for umr_read_vram()\n");
> +			return -1;
> +	}
> 
>  	return 0;
>  }
> --
> 2.12.0
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux