Am 18.05.2017 um 07:30 schrieb zhoucm1: > > > On 2017å¹´05æ??17æ?¥ 17:22, Christian König wrote: >> [SNIP] >> + /* In the case of a mixed PT the PDE must point to it*/ >> + if (p->adev->asic_type < CHIP_VEGA10 || >> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || >> + p->func != amdgpu_vm_do_set_ptes || >> + !(flags & AMDGPU_PTE_VALID)) { >> + >> + pt = (*bo)->tbo.mem.start << PAGE_SHIFT; >> + pt = amdgpu_gart_get_vm_pde(p->adev, pt); >> + flags = AMDGPU_PTE_VALID; > This case should be handled when updating levels, so return directly? The problem is that when we switch from huge page back to a normal mapping because the BO was evicted we also need to reset the PDE. >> + } else { >> + pt = amdgpu_gart_get_vm_pde(p->adev, dst); >> + flags |= AMDGPU_PDE_PTE; >> + } >> - return entry->bo; >> + if (entry->addr == pt && >> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) >> + return 0; >> + >> + entry->addr = pt; >> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); >> + >> + if (parent->bo->shadow) { >> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); >> + pde = pd_addr + pt_idx * 8; >> + amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags); > From the spec "any pde in the chain can itself take on the format of a > PTE and point directly to an aligned block of logical address space by > setting the P bit.", > So here should pass addr into PDE instead of pt. The pt variable was overwritten with the address a few lines above. The code was indeed hard to understand, so I've fixed it. >> + } >> + >> + pd_addr = amdgpu_bo_gpu_offset(parent->bo); >> + pde = pd_addr + pt_idx * 8; >> + amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags); > Should pass addr into PDE instead of pt as well. > > >> + return 0; >> } >> /** >> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct >> amdgpu_pte_update_params *params, >> uint64_t addr, pe_start; >> struct amdgpu_bo *pt; >> unsigned nptes; >> + int r; >> /* walk over the address space and update the page tables */ >> for (addr = start; addr < end; addr += nptes) { >> - pt = amdgpu_vm_get_pt(params, addr); >> - if (!pt) { >> - pr_err("PT not found, aborting update_ptes\n"); >> - return -EINVAL; >> - } >> + >> + if ((addr & ~mask) == (end & ~mask)) >> + nptes = end - addr; >> + else >> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >> + >> + r = amdgpu_vm_handle_huge_pages(params, addr, nptes, >> + dst, flags, &pt); > If huge page happens, its sub PTEs don't need to update more, they > cannot be indexed by page table when that PDE is PTE, right? Right, but I wasn't sure if we don't need them for something. So I've kept the update for now. Going to try dropping this today. > Btw: Is this BigK which people often said? Yes and no. I can explain more internally if you like. Regards, Christian. > > Regards, > David Zhou >> + if (r) >> + return r; >> if (params->shadow) { >> if (!pt->shadow) >> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct >> amdgpu_pte_update_params *params, >> pt = pt->shadow; >> } >> - if ((addr & ~mask) == (end & ~mask)) >> - nptes = end - addr; >> - else >> - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); >> - >> pe_start = amdgpu_bo_gpu_offset(pt); >> pe_start += (addr & mask) * 8; >> @@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct >> amdgpu_device *adev, >> /* padding, etc. */ >> ndw = 64; >> + /* one PDE write for each huge page */ >> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; >> + >> if (src) { >> /* only copy commands needed */ >> ndw += ncmds * 7; >> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct >> amdgpu_device *adev, >> error_free: >> amdgpu_job_free(job); >> + amdgpu_vm_invalidate_level(&vm->root); >> return r; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index afe9073..1c5e0ce 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry; >> /* TILED for VEGA10, reserved for older ASICs */ >> #define AMDGPU_PTE_PRT (1ULL << 51) >> +/* PDE is handled as PTE for VEGA10 */ >> +#define AMDGPU_PDE_PTE (1ULL << 54) >> + >> /* VEGA10 only */ >> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) >> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) >> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry; >> struct amdgpu_vm_pt { >> struct amdgpu_bo *bo; >> uint64_t addr; >> + bool huge_page; >> /* array of page tables, one for each directory entry */ >> struct amdgpu_vm_pt *entries; >