Ok, in this case let's commit it. I've fixed the one liner regarding CPU based updates and send it to the list once more. Please give me an rb or even tested-by on that version and then I'm going to commit it. Thanks, Christian. Am 18.07.2017 um 20:56 schrieb Felix Kuehling: > If you submit the code, it allows more people to experiment with it. > > Regards, > Felix > > > On 17-07-18 09:54 AM, Christian König wrote: >> Yeah, I mean it looks good from the software side but we still don't >> see the hardware react as it should. >> >> It doesn't seem to hurt anything, so I'm torn apart between pushing it >> and completely fixing it later on or wait till we have figured >> everything out. >> >> Felix what is your opinion on that? >> >> Regards, >> Christian. >> >> Am 18.07.2017 um 04:24 schrieb zhoucm1: >>> Still holding on? I thought this patch was pushed in earlier with my RB. >>> >>> Regards, >>> David Zhou >>> On 2017å¹´07æ??18æ?¥ 05:02, Christian König wrote: >>>> From: Christian König <christian.koenig at amd.com> >>>> >>>> The hardware can use huge pages to map 2MB of address space with >>>> only one PDE. >>>> >>>> v2: few cleanups and rebased >>>> v3: skip PT updates if we are using the PDE >>>> v4: rebased, added support for CPU based updates >>>> >>>> Signed-off-by: Christian König <christian.koenig at amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 >>>> +++++++++++++++++++++++++++------ >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ >>>> 2 files changed, 103 insertions(+), 20 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index a3dbebe..62d97f5 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -351,6 +351,7 @@ static int amdgpu_vm_alloc_levels(struct >>>> amdgpu_device *adev, >>>> entry->bo = pt; >>>> entry->addr = 0; >>>> + entry->huge_page = false; >>>> } >>>> if (level < adev->vm_manager.num_level) { >>>> @@ -1116,7 +1117,8 @@ static int amdgpu_vm_update_level(struct >>>> amdgpu_device *adev, >>>> pt = amdgpu_bo_gpu_offset(bo); >>>> pt = amdgpu_gart_get_vm_pde(adev, pt); >>>> - if (parent->entries[pt_idx].addr == pt) >>>> + if (parent->entries[pt_idx].addr == pt || >>>> + parent->entries[pt_idx].huge_page) >>>> continue; >>>> parent->entries[pt_idx].addr = pt; >>>> @@ -1257,29 +1259,95 @@ int amdgpu_vm_update_directories(struct >>>> amdgpu_device *adev, >>>> } >>>> /** >>>> - * amdgpu_vm_find_pt - find the page table for an address >>>> + * amdgpu_vm_find_entry - find the entry for an address >>>> * >>>> * @p: see amdgpu_pte_update_params definition >>>> * @addr: virtual address in question >>>> + * @entry: resulting entry or NULL >>>> + * @parent: parent entry >>>> * >>>> - * Find the page table BO for a virtual address, return NULL when >>>> none found. >>>> + * Find the vm_pt entry and it's parent for the given address. >>>> */ >>>> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct >>>> amdgpu_pte_update_params *p, >>>> - uint64_t addr) >>>> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, >>>> uint64_t addr, >>>> + struct amdgpu_vm_pt **entry, >>>> + struct amdgpu_vm_pt **parent) >>>> { >>>> - struct amdgpu_vm_pt *entry = &p->vm->root; >>>> unsigned idx, level = p->adev->vm_manager.num_level; >>>> - while (entry->entries) { >>>> + *parent = NULL; >>>> + *entry = &p->vm->root; >>>> + while ((*entry)->entries) { >>>> idx = addr >> (p->adev->vm_manager.block_size * level--); >>>> - idx %= amdgpu_bo_size(entry->bo) / 8; >>>> - entry = &entry->entries[idx]; >>>> + idx %= amdgpu_bo_size((*entry)->bo) / 8; >>>> + *parent = *entry; >>>> + *entry = &(*entry)->entries[idx]; >>>> } >>>> if (level) >>>> - return NULL; >>>> + *entry = NULL; >>>> +} >>>> + >>>> +/** >>>> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge >>>> pages >>>> + * >>>> + * @p: see amdgpu_pte_update_params definition >>>> + * @entry: vm_pt entry to check >>>> + * @parent: parent entry >>>> + * @nptes: number of PTEs updated with this operation >>>> + * @dst: destination address where the PTEs should point to >>>> + * @flags: access flags fro the PTEs >>>> + * >>>> + * Check if we can update the PD with a huge page. >>>> + */ >>>> +static int amdgpu_vm_handle_huge_pages(struct >>>> amdgpu_pte_update_params *p, >>>> + struct amdgpu_vm_pt *entry, >>>> + struct amdgpu_vm_pt *parent, >>>> + unsigned nptes, uint64_t dst, >>>> + uint64_t flags) >>>> +{ >>>> + bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); >>>> + uint64_t pd_addr, pde; >>>> + int r; >>>> - return entry->bo; >>>> + /* In the case of a mixed PT the PDE must point to it*/ >>>> + if (p->adev->asic_type < CHIP_VEGA10 || >>>> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || >>>> + p->func != amdgpu_vm_do_set_ptes || >>>> + !(flags & AMDGPU_PTE_VALID)) { >>>> + >>>> + dst = amdgpu_bo_gpu_offset(entry->bo); >>>> + dst = amdgpu_gart_get_vm_pde(p->adev, dst); >>>> + flags = AMDGPU_PTE_VALID; >>>> + } else { >>>> + flags |= AMDGPU_PDE_PTE; >>>> + } >>>> + >>>> + if (entry->addr == dst && >>>> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) >>>> + return 0; >>>> + >>>> + entry->addr = dst; >>>> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); >>>> + >>>> + if (use_cpu_update) { >>>> + r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); >>>> + if (r) >>>> + return r; >>>> + >>>> + pde = pd_addr + (entry - parent->entries) * 8; >>>> + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); >>>> + } else { >>>> + if (parent->bo->shadow) { >>>> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); >>>> + pde = pd_addr + (entry - parent->entries) * 8; >>>> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >>>> + } >>>> + pd_addr = amdgpu_bo_gpu_offset(parent->bo); >>>> + pde = pd_addr + (entry - parent->entries) * 8; >>>> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); >>>> + } >>>> + >>>> + return 0; >>>> } >>>> /** >>>> @@ -1306,22 +1374,31 @@ static int amdgpu_vm_update_ptes(struct >>>> amdgpu_pte_update_params *params, >>>> struct amdgpu_bo *pt; >>>> unsigned nptes; >>>> bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); >>>> - >>>> + int r; >>>> /* walk over the address space and update the page tables */ >>>> - for (addr = start; addr < end; addr += nptes) { >>>> - pt = amdgpu_vm_get_pt(params, addr); >>>> - if (!pt) { >>>> - pr_err("PT not found, aborting update_ptes\n"); >>>> - return -EINVAL; >>>> - } >>>> + for (addr = start; addr < end; addr += nptes, >>>> + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { >>>> + struct amdgpu_vm_pt *entry, *parent; >>>> + >>>> + amdgpu_vm_get_entry(params, addr, &entry, &parent); >>>> + if (!entry) >>>> + return -ENOENT; >>>> if ((addr & ~mask) == (end & ~mask)) >>>> nptes = end - addr; >>>> else >>>> nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >>>> + r = amdgpu_vm_handle_huge_pages(params, entry, parent, >>>> + nptes, dst, flags); >>>> + if (r) >>>> + return r; >>>> + if (entry->huge_page) >>>> + continue; >>>> + >>>> + pt = entry->bo; >>>> if (use_cpu_update) { >>>> pe_start = (unsigned long)pt->kptr; >>>> } else { >>>> @@ -1337,8 +1414,6 @@ static int amdgpu_vm_update_ptes(struct >>>> amdgpu_pte_update_params *params, >>>> pe_start += (addr & mask) * 8; >>>> params->func(params, pe_start, dst, nptes, >>>> AMDGPU_GPU_PAGE_SIZE, flags); >>>> - >>>> - dst += nptes * AMDGPU_GPU_PAGE_SIZE; >>>> } >>>> return 0; >>>> @@ -1490,6 +1565,9 @@ static int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> /* padding, etc. */ >>>> ndw = 64; >>>> + /* one PDE write for each huge page */ >>>> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; >>>> + >>>> if (src) { >>>> /* only copy commands needed */ >>>> ndw += ncmds * 7; >>>> @@ -1569,6 +1647,7 @@ static int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> error_free: >>>> amdgpu_job_free(job); >>>> + amdgpu_vm_invalidate_level(&vm->root); >>>> return r; >>>> } >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> index c4f5d1f..34d9174 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; >>>> /* TILED for VEGA10, reserved for older ASICs */ >>>> #define AMDGPU_PTE_PRT (1ULL << 51) >>>> +/* PDE is handled as PTE for VEGA10 */ >>>> +#define AMDGPU_PDE_PTE (1ULL << 54) >>>> + >>>> /* VEGA10 only */ >>>> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) >>>> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) >>>> @@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry; >>>> struct amdgpu_vm_pt { >>>> struct amdgpu_bo *bo; >>>> uint64_t addr; >>>> + bool huge_page; >>>> /* array of page tables, one for each directory entry */ >>>> struct amdgpu_vm_pt *entries; >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx