Sorry for the delayed reply, just had to fish this mail out of my spam folder. > Do you need something in amdgpu_vm_update_level to stop it from > overwriting huge page PTEs? Yes, this is exactly what this chunk added to amdgpu_vm_update_level is doing: > - if (parent->entries[pt_idx].addr == pt) > + if (parent->entries[pt_idx].addr == pt || > + parent->entries[pt_idx].huge_page) > continue; Regards, Christian. Am 23.05.2017 um 21:29 schrieb Felix Kuehling: > Do you need something in amdgpu_vm_update_level to stop it from > overwriting huge page PTEs? > > Regards, > Felix > > > On 17-05-23 12:52 PM, Christian König wrote: >> From: Christian König <christian.koenig at amd.com> >> >> The hardware can use huge pages to map 2MB of address space with only one PDE. >> >> v2: few cleanups and rebased >> >> Signed-off-by: Christian König <christian.koenig at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++--------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ >> 2 files changed, 75 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index f7afdfa..f07c9b8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, >> >> entry->bo = pt; >> entry->addr = 0; >> + entry->huge_page = false; >> } >> >> if (level < adev->vm_manager.num_level) { >> @@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, >> >> pt = amdgpu_bo_gpu_offset(bo); >> pt = amdgpu_gart_get_vm_pde(adev, pt); >> - if (parent->entries[pt_idx].addr == pt) >> + if (parent->entries[pt_idx].addr == pt || >> + parent->entries[pt_idx].huge_page) >> continue; >> >> parent->entries[pt_idx].addr = pt; >> @@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, >> } >> >> /** >> - * amdgpu_vm_find_pt - find the page table for an address >> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages >> * >> * @p: see amdgpu_pte_update_params definition >> * @addr: virtual address in question >> + * @nptes: number of PTEs updated with this operation >> + * @dst: destination address where the PTEs should point to >> + * @flags: access flags fro the PTEs >> + * @bo: resulting page tables BO >> * >> - * Find the page table BO for a virtual address, return NULL when none found. >> + * Check if we can update the PD with a huge page. Also finds the page table >> + * BO for a virtual address, returns -ENOENT when nothing found. >> */ >> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, >> - uint64_t addr) >> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, >> + uint64_t addr, unsigned nptes, >> + uint64_t dst, uint64_t flags, >> + struct amdgpu_bo **bo) >> { >> - struct amdgpu_vm_pt *entry = &p->vm->root; >> - unsigned idx, level = p->adev->vm_manager.num_level; >> + unsigned pt_idx, level = p->adev->vm_manager.num_level; >> + struct amdgpu_vm_pt *entry = &p->vm->root, *parent; >> + uint64_t pd_addr, pde; >> >> - while (entry->entries) { >> - idx = addr >> (p->adev->vm_manager.block_size * level--); >> - idx %= amdgpu_bo_size(entry->bo) / 8; >> - entry = &entry->entries[idx]; >> - } >> + do { >> + pt_idx = addr >> (p->adev->vm_manager.block_size * level--); >> + pt_idx %= amdgpu_bo_size(entry->bo) / 8; >> + parent = entry; >> + entry = &entry->entries[pt_idx]; >> + } while (entry->entries); >> >> if (level) >> - return NULL; >> + return -ENOENT; >> + >> + *bo = entry->bo; >> + >> + /* In the case of a mixed PT the PDE must point to it*/ >> + if (p->adev->asic_type < CHIP_VEGA10 || >> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || >> + p->func != amdgpu_vm_do_set_ptes || >> + !(flags & AMDGPU_PTE_VALID)) { >> + >> + dst = amdgpu_bo_gpu_offset(*bo); >> + dst = amdgpu_gart_get_vm_pde(p->adev, dst); >> + flags = AMDGPU_PTE_VALID; >> + } else { >> + flags |= AMDGPU_PDE_PTE; >> + } >> >> - return entry->bo; >> + if (entry->addr == dst && >> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) >> + return 0; >> + >> + entry->addr = dst; >> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); >> + >> + if (parent->bo->shadow) { >> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); >> + pde = pd_addr + pt_idx * 8; >> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >> + } >> + >> + pd_addr = amdgpu_bo_gpu_offset(parent->bo); >> + pde = pd_addr + pt_idx * 8; >> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >> + return 0; >> } >> >> /** >> @@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, >> uint64_t addr, pe_start; >> struct amdgpu_bo *pt; >> unsigned nptes; >> + int r; >> >> /* walk over the address space and update the page tables */ >> for (addr = start; addr < end; addr += nptes) { >> - pt = amdgpu_vm_get_pt(params, addr); >> - if (!pt) { >> - pr_err("PT not found, aborting update_ptes\n"); >> - return -EINVAL; >> - } >> + >> + if ((addr & ~mask) == (end & ~mask)) >> + nptes = end - addr; >> + else >> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >> + >> + r = amdgpu_vm_handle_huge_pages(params, addr, nptes, >> + dst, flags, &pt); >> + if (r) >> + return r; >> >> if (params->shadow) { >> if (!pt->shadow) >> @@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, >> pt = pt->shadow; >> } >> >> - if ((addr & ~mask) == (end & ~mask)) >> - nptes = end - addr; >> - else >> - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >> - >> pe_start = amdgpu_bo_gpu_offset(pt); >> pe_start += (addr & mask) * 8; >> >> @@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> /* padding, etc. */ >> ndw = 64; >> >> + /* one PDE write for each huge page */ >> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; >> + >> if (src) { >> /* only copy commands needed */ >> ndw += ncmds * 7; >> @@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> >> error_free: >> amdgpu_job_free(job); >> + amdgpu_vm_invalidate_level(&vm->root); >> return r; >> } >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index 0f83fe3..2940d66 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; >> /* TILED for VEGA10, reserved for older ASICs */ >> #define AMDGPU_PTE_PRT (1ULL << 51) >> >> +/* PDE is handled as PTE for VEGA10 */ >> +#define AMDGPU_PDE_PTE (1ULL << 54) >> + >> /* VEGA10 only */ >> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) >> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) >> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry; >> struct amdgpu_vm_pt { >> struct amdgpu_bo *bo; >> uint64_t addr; >> + bool huge_page; >> >> /* array of page tables, one for each directory entry */ >> struct amdgpu_vm_pt *entries;