If you submit the code, it allows more people to experiment with it. Regards, Felix On 17-07-18 09:54 AM, Christian König wrote: > Yeah, I mean it looks good from the software side but we still don't > see the hardware react as it should. > > It doesn't seem to hurt anything, so I'm torn apart between pushing it > and completely fixing it later on or wait till we have figured > everything out. > > Felix what is your opinion on that? > > Regards, > Christian. > > Am 18.07.2017 um 04:24 schrieb zhoucm1: >> Still holding on? I thought this patch was pushed in earlier with my RB. >> >> Regards, >> David Zhou >> On 2017å¹´07æ??18æ?¥ 05:02, Christian König wrote: >>> From: Christian König <christian.koenig at amd.com> >>> >>> The hardware can use huge pages to map 2MB of address space with >>> only one PDE. >>> >>> v2: few cleanups and rebased >>> v3: skip PT updates if we are using the PDE >>> v4: rebased, added support for CPU based updates >>> >>> Signed-off-by: Christian König <christian.koenig at amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 >>> +++++++++++++++++++++++++++------ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ >>> 2 files changed, 103 insertions(+), 20 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index a3dbebe..62d97f5 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -351,6 +351,7 @@ static int amdgpu_vm_alloc_levels(struct >>> amdgpu_device *adev, >>> entry->bo = pt; >>> entry->addr = 0; >>> + entry->huge_page = false; >>> } >>> if (level < adev->vm_manager.num_level) { >>> @@ -1116,7 +1117,8 @@ static int amdgpu_vm_update_level(struct >>> amdgpu_device *adev, >>> pt = amdgpu_bo_gpu_offset(bo); >>> pt = amdgpu_gart_get_vm_pde(adev, pt); >>> - if (parent->entries[pt_idx].addr == pt) >>> + if (parent->entries[pt_idx].addr == pt || >>> + parent->entries[pt_idx].huge_page) >>> continue; >>> parent->entries[pt_idx].addr = pt; >>> @@ -1257,29 +1259,95 @@ int amdgpu_vm_update_directories(struct >>> amdgpu_device *adev, >>> } >>> /** >>> - * amdgpu_vm_find_pt - find the page table for an address >>> + * amdgpu_vm_find_entry - find the entry for an address >>> * >>> * @p: see amdgpu_pte_update_params definition >>> * @addr: virtual address in question >>> + * @entry: resulting entry or NULL >>> + * @parent: parent entry >>> * >>> - * Find the page table BO for a virtual address, return NULL when >>> none found. >>> + * Find the vm_pt entry and it's parent for the given address. >>> */ >>> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct >>> amdgpu_pte_update_params *p, >>> - uint64_t addr) >>> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, >>> uint64_t addr, >>> + struct amdgpu_vm_pt **entry, >>> + struct amdgpu_vm_pt **parent) >>> { >>> - struct amdgpu_vm_pt *entry = &p->vm->root; >>> unsigned idx, level = p->adev->vm_manager.num_level; >>> - while (entry->entries) { >>> + *parent = NULL; >>> + *entry = &p->vm->root; >>> + while ((*entry)->entries) { >>> idx = addr >> (p->adev->vm_manager.block_size * level--); >>> - idx %= amdgpu_bo_size(entry->bo) / 8; >>> - entry = &entry->entries[idx]; >>> + idx %= amdgpu_bo_size((*entry)->bo) / 8; >>> + *parent = *entry; >>> + *entry = &(*entry)->entries[idx]; >>> } >>> if (level) >>> - return NULL; >>> + *entry = NULL; >>> +} >>> + >>> +/** >>> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge >>> pages >>> + * >>> + * @p: see amdgpu_pte_update_params definition >>> + * @entry: vm_pt entry to check >>> + * @parent: parent entry >>> + * @nptes: number of PTEs updated with this operation >>> + * @dst: destination address where the PTEs should point to >>> + * @flags: access flags fro the PTEs >>> + * >>> + * Check if we can update the PD with a huge page. >>> + */ >>> +static int amdgpu_vm_handle_huge_pages(struct >>> amdgpu_pte_update_params *p, >>> + struct amdgpu_vm_pt *entry, >>> + struct amdgpu_vm_pt *parent, >>> + unsigned nptes, uint64_t dst, >>> + uint64_t flags) >>> +{ >>> + bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); >>> + uint64_t pd_addr, pde; >>> + int r; >>> - return entry->bo; >>> + /* In the case of a mixed PT the PDE must point to it*/ >>> + if (p->adev->asic_type < CHIP_VEGA10 || >>> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || >>> + p->func != amdgpu_vm_do_set_ptes || >>> + !(flags & AMDGPU_PTE_VALID)) { >>> + >>> + dst = amdgpu_bo_gpu_offset(entry->bo); >>> + dst = amdgpu_gart_get_vm_pde(p->adev, dst); >>> + flags = AMDGPU_PTE_VALID; >>> + } else { >>> + flags |= AMDGPU_PDE_PTE; >>> + } >>> + >>> + if (entry->addr == dst && >>> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) >>> + return 0; >>> + >>> + entry->addr = dst; >>> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); >>> + >>> + if (use_cpu_update) { >>> + r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); >>> + if (r) >>> + return r; >>> + >>> + pde = pd_addr + (entry - parent->entries) * 8; >>> + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); >>> + } else { >>> + if (parent->bo->shadow) { >>> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); >>> + pde = pd_addr + (entry - parent->entries) * 8; >>> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >>> + } >>> + pd_addr = amdgpu_bo_gpu_offset(parent->bo); >>> + pde = pd_addr + (entry - parent->entries) * 8; >>> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >>> + } >>> + >>> + return 0; >>> } >>> /** >>> @@ -1306,22 +1374,31 @@ static int amdgpu_vm_update_ptes(struct >>> amdgpu_pte_update_params *params, >>> struct amdgpu_bo *pt; >>> unsigned nptes; >>> bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); >>> - >>> + int r; >>> /* walk over the address space and update the page tables */ >>> - for (addr = start; addr < end; addr += nptes) { >>> - pt = amdgpu_vm_get_pt(params, addr); >>> - if (!pt) { >>> - pr_err("PT not found, aborting update_ptes\n"); >>> - return -EINVAL; >>> - } >>> + for (addr = start; addr < end; addr += nptes, >>> + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { >>> + struct amdgpu_vm_pt *entry, *parent; >>> + >>> + amdgpu_vm_get_entry(params, addr, &entry, &parent); >>> + if (!entry) >>> + return -ENOENT; >>> if ((addr & ~mask) == (end & ~mask)) >>> nptes = end - addr; >>> else >>> nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >>> + r = amdgpu_vm_handle_huge_pages(params, entry, parent, >>> + nptes, dst, flags); >>> + if (r) >>> + return r; >>> + if (entry->huge_page) >>> + continue; >>> + >>> + pt = entry->bo; >>> if (use_cpu_update) { >>> pe_start = (unsigned long)pt->kptr; >>> } else { >>> @@ -1337,8 +1414,6 @@ static int amdgpu_vm_update_ptes(struct >>> amdgpu_pte_update_params *params, >>> pe_start += (addr & mask) * 8; >>> params->func(params, pe_start, dst, nptes, >>> AMDGPU_GPU_PAGE_SIZE, flags); >>> - >>> - dst += nptes * AMDGPU_GPU_PAGE_SIZE; >>> } >>> return 0; >>> @@ -1490,6 +1565,9 @@ static int amdgpu_vm_bo_update_mapping(struct >>> amdgpu_device *adev, >>> /* padding, etc. */ >>> ndw = 64; >>> + /* one PDE write for each huge page */ >>> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; >>> + >>> if (src) { >>> /* only copy commands needed */ >>> ndw += ncmds * 7; >>> @@ -1569,6 +1647,7 @@ static int amdgpu_vm_bo_update_mapping(struct >>> amdgpu_device *adev, >>> error_free: >>> amdgpu_job_free(job); >>> + amdgpu_vm_invalidate_level(&vm->root); >>> return r; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> index c4f5d1f..34d9174 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; >>> /* TILED for VEGA10, reserved for older ASICs */ >>> #define AMDGPU_PTE_PRT (1ULL << 51) >>> +/* PDE is handled as PTE for VEGA10 */ >>> +#define AMDGPU_PDE_PTE (1ULL << 54) >>> + >>> /* VEGA10 only */ >>> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) >>> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) >>> @@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry; >>> struct amdgpu_vm_pt { >>> struct amdgpu_bo *bo; >>> uint64_t addr; >>> + bool huge_page; >>> /* array of page tables, one for each directory entry */ >>> struct amdgpu_vm_pt *entries; >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > >