[PATCH 2/2] drm/amdgpu: enable huge page handling in the VM v4

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ok, in this case let's commit it.

I've fixed the one liner regarding CPU based updates and send it to the 
list once more.

Please give me an rb or even tested-by on that version and then I'm 
going to commit it.

Thanks,
Christian.

Am 18.07.2017 um 20:56 schrieb Felix Kuehling:
> If you submit the code, it allows more people to experiment with it.
>
> Regards,
>    Felix
>
>
> On 17-07-18 09:54 AM, Christian König wrote:
>> Yeah, I mean it looks good from the software side but we still don't
>> see the hardware react as it should.
>>
>> It doesn't seem to hurt anything, so I'm torn apart between pushing it
>> and completely fixing it later on or wait till we have figured
>> everything out.
>>
>> Felix what is your opinion on that?
>>
>> Regards,
>> Christian.
>>
>> Am 18.07.2017 um 04:24 schrieb zhoucm1:
>>> Still holding on? I thought this patch was pushed in earlier with my RB.
>>>
>>> Regards,
>>> David Zhou
>>> On 2017å¹´07æ??18æ?¥ 05:02, Christian König wrote:
>>>> From: Christian König <christian.koenig at amd.com>
>>>>
>>>> The hardware can use huge pages to map 2MB of address space with
>>>> only one PDE.
>>>>
>>>> v2: few cleanups and rebased
>>>> v3: skip PT updates if we are using the PDE
>>>> v4: rebased, added support for CPU based updates
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119
>>>> +++++++++++++++++++++++++++------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   4 ++
>>>>    2 files changed, 103 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index a3dbebe..62d97f5 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -351,6 +351,7 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>                  entry->bo = pt;
>>>>                entry->addr = 0;
>>>> +            entry->huge_page = false;
>>>>            }
>>>>              if (level < adev->vm_manager.num_level) {
>>>> @@ -1116,7 +1117,8 @@ static int amdgpu_vm_update_level(struct
>>>> amdgpu_device *adev,
>>>>              pt = amdgpu_bo_gpu_offset(bo);
>>>>            pt = amdgpu_gart_get_vm_pde(adev, pt);
>>>> -        if (parent->entries[pt_idx].addr == pt)
>>>> +        if (parent->entries[pt_idx].addr == pt ||
>>>> +            parent->entries[pt_idx].huge_page)
>>>>                continue;
>>>>              parent->entries[pt_idx].addr = pt;
>>>> @@ -1257,29 +1259,95 @@ int amdgpu_vm_update_directories(struct
>>>> amdgpu_device *adev,
>>>>    }
>>>>      /**
>>>> - * amdgpu_vm_find_pt - find the page table for an address
>>>> + * amdgpu_vm_find_entry - find the entry for an address
>>>>     *
>>>>     * @p: see amdgpu_pte_update_params definition
>>>>     * @addr: virtual address in question
>>>> + * @entry: resulting entry or NULL
>>>> + * @parent: parent entry
>>>>     *
>>>> - * Find the page table BO for a virtual address, return NULL when
>>>> none found.
>>>> + * Find the vm_pt entry and it's parent for the given address.
>>>>     */
>>>> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct
>>>> amdgpu_pte_update_params *p,
>>>> -                      uint64_t addr)
>>>> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p,
>>>> uint64_t addr,
>>>> +             struct amdgpu_vm_pt **entry,
>>>> +             struct amdgpu_vm_pt **parent)
>>>>    {
>>>> -    struct amdgpu_vm_pt *entry = &p->vm->root;
>>>>        unsigned idx, level = p->adev->vm_manager.num_level;
>>>>    -    while (entry->entries) {
>>>> +    *parent = NULL;
>>>> +    *entry = &p->vm->root;
>>>> +    while ((*entry)->entries) {
>>>>            idx = addr >> (p->adev->vm_manager.block_size * level--);
>>>> -        idx %= amdgpu_bo_size(entry->bo) / 8;
>>>> -        entry = &entry->entries[idx];
>>>> +        idx %= amdgpu_bo_size((*entry)->bo) / 8;
>>>> +        *parent = *entry;
>>>> +        *entry = &(*entry)->entries[idx];
>>>>        }
>>>>          if (level)
>>>> -        return NULL;
>>>> +        *entry = NULL;
>>>> +}
>>>> +
>>>> +/**
>>>> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge
>>>> pages
>>>> + *
>>>> + * @p: see amdgpu_pte_update_params definition
>>>> + * @entry: vm_pt entry to check
>>>> + * @parent: parent entry
>>>> + * @nptes: number of PTEs updated with this operation
>>>> + * @dst: destination address where the PTEs should point to
>>>> + * @flags: access flags fro the PTEs
>>>> + *
>>>> + * Check if we can update the PD with a huge page.
>>>> + */
>>>> +static int amdgpu_vm_handle_huge_pages(struct
>>>> amdgpu_pte_update_params *p,
>>>> +                       struct amdgpu_vm_pt *entry,
>>>> +                       struct amdgpu_vm_pt *parent,
>>>> +                       unsigned nptes, uint64_t dst,
>>>> +                       uint64_t flags)
>>>> +{
>>>> +    bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
>>>> +    uint64_t pd_addr, pde;
>>>> +    int r;
>>>>    -    return entry->bo;
>>>> +    /* In the case of a mixed PT the PDE must point to it*/
>>>> +    if (p->adev->asic_type < CHIP_VEGA10 ||
>>>> +        nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
>>>> +        p->func != amdgpu_vm_do_set_ptes ||
>>>> +        !(flags & AMDGPU_PTE_VALID)) {
>>>> +
>>>> +        dst = amdgpu_bo_gpu_offset(entry->bo);
>>>> +        dst = amdgpu_gart_get_vm_pde(p->adev, dst);
>>>> +        flags = AMDGPU_PTE_VALID;
>>>> +    } else {
>>>> +        flags |= AMDGPU_PDE_PTE;
>>>> +    }
>>>> +
>>>> +    if (entry->addr == dst &&
>>>> +        entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
>>>> +        return 0;
>>>> +
>>>> +    entry->addr = dst;
>>>> +    entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
>>>> +
>>>> +    if (use_cpu_update) {
>>>> +        r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +        pde = pd_addr + (entry - parent->entries) * 8;
>>>> +        amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
>>>> +    } else {
>>>> +        if (parent->bo->shadow) {
>>>> +            pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
>>>> +            pde = pd_addr + (entry - parent->entries) * 8;
>>>> +            amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>>>> +        }
>>>> +        pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>>> +        pde = pd_addr + (entry - parent->entries) * 8;
>>>> +        amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>>>> +    }
>>>> +
>>>> +    return 0;
>>>>    }
>>>>      /**
>>>> @@ -1306,22 +1374,31 @@ static int amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>>        struct amdgpu_bo *pt;
>>>>        unsigned nptes;
>>>>        bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
>>>> -
>>>> +    int r;
>>>>          /* walk over the address space and update the page tables */
>>>> -    for (addr = start; addr < end; addr += nptes) {
>>>> -        pt = amdgpu_vm_get_pt(params, addr);
>>>> -        if (!pt) {
>>>> -            pr_err("PT not found, aborting update_ptes\n");
>>>> -            return -EINVAL;
>>>> -        }
>>>> +    for (addr = start; addr < end; addr += nptes,
>>>> +         dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
>>>> +        struct amdgpu_vm_pt *entry, *parent;
>>>> +
>>>> +        amdgpu_vm_get_entry(params, addr, &entry, &parent);
>>>> +        if (!entry)
>>>> +            return -ENOENT;
>>>>              if ((addr & ~mask) == (end & ~mask))
>>>>                nptes = end - addr;
>>>>            else
>>>>                nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>>>    +        r = amdgpu_vm_handle_huge_pages(params, entry, parent,
>>>> +                        nptes, dst, flags);
>>>> +        if (r)
>>>> +            return r;
>>>>    +        if (entry->huge_page)
>>>> +            continue;
>>>> +
>>>> +        pt = entry->bo;
>>>>            if (use_cpu_update) {
>>>>                pe_start = (unsigned long)pt->kptr;
>>>>            } else {
>>>> @@ -1337,8 +1414,6 @@ static int amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>>            pe_start += (addr & mask) * 8;
>>>>            params->func(params, pe_start, dst, nptes,
>>>>                     AMDGPU_GPU_PAGE_SIZE, flags);
>>>> -
>>>> -        dst += nptes * AMDGPU_GPU_PAGE_SIZE;
>>>>        }
>>>>          return 0;
>>>> @@ -1490,6 +1565,9 @@ static int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>>        /* padding, etc. */
>>>>        ndw = 64;
>>>>    +    /* one PDE write for each huge page */
>>>> +    ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
>>>> +
>>>>        if (src) {
>>>>            /* only copy commands needed */
>>>>            ndw += ncmds * 7;
>>>> @@ -1569,6 +1647,7 @@ static int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>>      error_free:
>>>>        amdgpu_job_free(job);
>>>> +    amdgpu_vm_invalidate_level(&vm->root);
>>>>        return r;
>>>>    }
>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index c4f5d1f..34d9174 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
>>>>    /* TILED for VEGA10, reserved for older ASICs  */
>>>>    #define AMDGPU_PTE_PRT        (1ULL << 51)
>>>>    +/* PDE is handled as PTE for VEGA10 */
>>>> +#define AMDGPU_PDE_PTE        (1ULL << 54)
>>>> +
>>>>    /* VEGA10 only */
>>>>    #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>>>    #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>>>> @@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry;
>>>>    struct amdgpu_vm_pt {
>>>>        struct amdgpu_bo    *bo;
>>>>        uint64_t        addr;
>>>> +    bool            huge_page;
>>>>          /* array of page tables, one for each directory entry */
>>>>        struct amdgpu_vm_pt    *entries;
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux