[PATCH 7/7] drm/amdgpu: enable huge page handling in the VM

david1.zhou@xxxxxxx (zhoucm1) · Wed, 24 May 2017 17:28:59 +0800



On 2017å¹´05æ??24æ?¥ 17:15, Christian KÃ¶nig wrote:
> Am 18.05.2017 um 07:30 schrieb zhoucm1:
>>
>>
>> On 2017å¹´05æ??17æ?¥ 17:22, Christian KÃ¶nig wrote:
>>> [SNIP]
>>> +    /* In the case of a mixed PT the PDE must point to it*/
>>> +    if (p->adev->asic_type < CHIP_VEGA10 ||
>>> +        nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
>>> +        p->func != amdgpu_vm_do_set_ptes ||
>>> +        !(flags & AMDGPU_PTE_VALID)) {
>>> +
>>> +        pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
>>> +        pt = amdgpu_gart_get_vm_pde(p->adev, pt);
>>> +        flags = AMDGPU_PTE_VALID;
>> This case should be handled when updating levels, so return directly?
>
> The problem is that when we switch from huge page back to a normal 
> mapping because the BO was evicted we also need to reset the PDE.
>
>>> +    } else {
>>> +        pt = amdgpu_gart_get_vm_pde(p->adev, dst);
>>> +        flags |= AMDGPU_PDE_PTE;
>>> +    }
>>>   -    return entry->bo;
>>> +    if (entry->addr == pt &&
>>> +        entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
>>> +        return 0;
>>> +
>>> +    entry->addr = pt;
>>> +    entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
>>> +
>>> +    if (parent->bo->shadow) {
>>> +        pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
>>> +        pde = pd_addr + pt_idx * 8;
>>> +        amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
>> From the spec "any pde in the chain can itself take on the format of 
>> a PTE and point directly to an aligned block of logical address space 
>> by setting the P bit.",
>> So here should pass addr into PDE instead of pt.
>
> The pt variable was overwritten with the address a few lines above. 
> The code was indeed hard to understand, so I've fixed it.
this isn't my mean, if I understand spec correctly, it should be 
amdgpu_vm_do_set_ptes(p, pde, addr, 1, 0, flags);

>
>>> +    }
>>> +
>>> +    pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>> +    pde = pd_addr + pt_idx * 8;
>>> +    amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
>> Should pass addr into PDE instead of pt as well.
>>
>>
>>> +    return 0;
>>>   }
>>>     /**
>>> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct 
>>> amdgpu_pte_update_params *params,
>>>       uint64_t addr, pe_start;
>>>       struct amdgpu_bo *pt;
>>>       unsigned nptes;
>>> +    int r;
>>>         /* walk over the address space and update the page tables */
>>>       for (addr = start; addr < end; addr += nptes) {
>>> -        pt = amdgpu_vm_get_pt(params, addr);
>>> -        if (!pt) {
>>> -            pr_err("PT not found, aborting update_ptes\n");
>>> -            return -EINVAL;
>>> -        }
>>> +
>>> +        if ((addr & ~mask) == (end & ~mask))
>>> +            nptes = end - addr;
>>> +        else
>>> +            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>> +
>>> +        r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
>>> +                        dst, flags, &pt);
>> If huge page happens, its sub PTEs don't need to update more, they 
>> cannot be indexed by page table when that PDE is PTE, right?
>
> Right, but I wasn't sure if we don't need them for something. So I've 
> kept the update for now.
>
> Going to try dropping this today.
>
>> Btw: Is this BigK which people often said?
>
> Yes and no. I can explain more internally if you like.
Welcome.

Regards,
David Zhou
>
> Regards,
> Christian.
>
>>
>> Regards,
>> David Zhou
>>> +        if (r)
>>> +            return r;
>>>             if (params->shadow) {
>>>               if (!pt->shadow)
>>> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct 
>>> amdgpu_pte_update_params *params,
>>>               pt = pt->shadow;
>>>           }
>>>   -        if ((addr & ~mask) == (end & ~mask))
>>> -            nptes = end - addr;
>>> -        else
>>> -            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>> -
>>>           pe_start = amdgpu_bo_gpu_offset(pt);
>>>           pe_start += (addr & mask) * 8;
>>>   @@ -1353,6 +1397,9 @@ static int 
>>> amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>       /* padding, etc. */
>>>       ndw = 64;
>>>   +    /* one PDE write for each huge page */
>>> +    ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
>>> +
>>>       if (src) {
>>>           /* only copy commands needed */
>>>           ndw += ncmds * 7;
>>> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>>> amdgpu_device *adev,
>>>     error_free:
>>>       amdgpu_job_free(job);
>>> +    amdgpu_vm_invalidate_level(&vm->root);
>>>       return r;
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index afe9073..1c5e0ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
>>>   /* TILED for VEGA10, reserved for older ASICs  */
>>>   #define AMDGPU_PTE_PRT        (1ULL << 51)
>>>   +/* PDE is handled as PTE for VEGA10 */
>>> +#define AMDGPU_PDE_PTE        (1ULL << 54)
>>> +
>>>   /* VEGA10 only */
>>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>>   #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>>> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
>>>   struct amdgpu_vm_pt {
>>>       struct amdgpu_bo    *bo;
>>>       uint64_t        addr;
>>> +    bool            huge_page;
>>>         /* array of page tables, one for each directory entry */
>>>       struct amdgpu_vm_pt    *entries;
>>
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170524/bd22237d/attachment-0001.html>