From: Christian König <christian.koenig@xxxxxxx> The hardware can use huge pages to map 2MB of address space with only one PDE. v2: few cleanups and rebased Signed-off-by: Christian König <christian.koenig at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f7afdfa..f07c9b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, entry->bo = pt; entry->addr = 0; + entry->huge_page = false; } if (level < adev->vm_manager.num_level) { @@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, pt = amdgpu_bo_gpu_offset(bo); pt = amdgpu_gart_get_vm_pde(adev, pt); - if (parent->entries[pt_idx].addr == pt) + if (parent->entries[pt_idx].addr == pt || + parent->entries[pt_idx].huge_page) continue; parent->entries[pt_idx].addr = pt; @@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, } /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages * * @p: see amdgpu_pte_update_params definition * @addr: virtual address in question + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * @bo: resulting page tables BO * - * Find the page table BO for a virtual address, return NULL when none found. + * Check if we can update the PD with a huge page. Also finds the page table + * BO for a virtual address, returns -ENOENT when nothing found. */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, - uint64_t addr) +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + uint64_t addr, unsigned nptes, + uint64_t dst, uint64_t flags, + struct amdgpu_bo **bo) { - struct amdgpu_vm_pt *entry = &p->vm->root; - unsigned idx, level = p->adev->vm_manager.num_level; + unsigned pt_idx, level = p->adev->vm_manager.num_level; + struct amdgpu_vm_pt *entry = &p->vm->root, *parent; + uint64_t pd_addr, pde; - while (entry->entries) { - idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size(entry->bo) / 8; - entry = &entry->entries[idx]; - } + do { + pt_idx = addr >> (p->adev->vm_manager.block_size * level--); + pt_idx %= amdgpu_bo_size(entry->bo) / 8; + parent = entry; + entry = &entry->entries[pt_idx]; + } while (entry->entries); if (level) - return NULL; + return -ENOENT; + + *bo = entry->bo; + + /* In the case of a mixed PT the PDE must point to it*/ + if (p->adev->asic_type < CHIP_VEGA10 || + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || + p->func != amdgpu_vm_do_set_ptes || + !(flags & AMDGPU_PTE_VALID)) { + + dst = amdgpu_bo_gpu_offset(*bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { + flags |= AMDGPU_PDE_PTE; + } - return entry->bo; + if (entry->addr == dst && + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) + return 0; + + entry->addr = dst; + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); + + if (parent->bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + pt_idx * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + pt_idx * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + return 0; } /** @@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; + int r; /* walk over the address space and update the page tables */ for (addr = start; addr < end; addr += nptes) { - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return -EINVAL; - } + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); + + r = amdgpu_vm_handle_huge_pages(params, addr, nptes, + dst, flags, &pt); + if (r) + return r; if (params->shadow) { if (!pt->shadow) @@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, pt = pt->shadow; } - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - pe_start = amdgpu_bo_gpu_offset(pt); pe_start += (addr & mask) * 8; @@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; + if (src) { /* only copy commands needed */ ndw += ncmds * 7; @@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); + amdgpu_vm_invalidate_level(&vm->root); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 0f83fe3..2940d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; /* TILED for VEGA10, reserved for older ASICs */ #define AMDGPU_PTE_PRT (1ULL << 51) +/* PDE is handled as PTE for VEGA10 */ +#define AMDGPU_PDE_PTE (1ULL << 54) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry; struct amdgpu_vm_pt { struct amdgpu_bo *bo; uint64_t addr; + bool huge_page; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; -- 2.7.4