So the first patch is not a straight revert, although the title looks like it is. I'll read the first patch more carefully. On 2018-01-26 03:21 PM, Christian König wrote: > The amdgpu_vm_clear_bo function takes over this functionality in the > first patch. > > This patch only limits filling in the ats values in the lower halve of > the address range (previously it was filled in the whole address space). > > Regards, > Christian. > > Am 26.01.2018 um 21:18 schrieb Felix Kuehling: >> Shouldn't this change come before all the reverts? Otherwise you're >> briefly breaking ATS support on Raven for KFD. >> >> Regards, >>   Felix >> >> >> On 2018-01-26 05:04 AM, Christian König wrote: >>> At least on x86-64 the upper range is purely used by the kernel, >>> avoid creating any ATS mappings there as security precaution and to >>> allow proper page fault reporting in the upper range. >>> >>> Signed-off-by: Christian König <christian.koenig at amd.com> >>> --- >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 83 >>> ++++++++++++++++++++++------------ >>>  1 file changed, 54 insertions(+), 29 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index 14798e20abca..a3b9c3976eb3 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -267,24 +267,34 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) >>>   * Root PD needs to be reserved when calling this. >>>   */ >>>  static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, >>> -                 struct amdgpu_vm *vm, >>> -                 struct amdgpu_bo *bo, >>> -                 unsigned level) >>> +                 struct amdgpu_vm *vm, struct amdgpu_bo *bo, >>> +                 unsigned level, bool pte_support_ats) >>>  { >>>      struct ttm_operation_ctx ctx = { true, false }; >>>      struct dma_fence *fence = NULL; >>> -   uint64_t addr, init_value; >>> +   unsigned entries, ats_entries; >>> +   uint64_t addr, ats_value; >>>      struct amdgpu_ring *ring; >>>      struct amdgpu_job *job; >>> -   unsigned entries; >>>      int r; >>>  -   if (vm->pte_support_ats) { >>> -       init_value = AMDGPU_PTE_DEFAULT_ATC; >>> -       if (level != AMDGPU_VM_PTB) >>> -           init_value |= AMDGPU_PDE_PTE; >>> +   addr = amdgpu_bo_gpu_offset(bo); >>> +   entries = amdgpu_bo_size(bo) / 8; >>> + >>> +   if (pte_support_ats) { >>> +       if (level == adev->vm_manager.root_level) { >>> +           ats_entries = amdgpu_vm_level_shift(adev, level); >>> +           ats_entries += AMDGPU_GPU_PAGE_SHIFT; >>> +           ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; >>> +           ats_entries = min(ats_entries, entries); >>> +           entries -= ats_entries; >>> +       } else { >>> +           ats_entries = entries; >>> +           entries = 0; >>> +       } >>>      } else { >>> -       init_value = 0; >>> +       ats_entries = 0; >>> +       ats_value = 0; >>>      } >>>       ring = container_of(vm->entity.sched, struct amdgpu_ring, >>> sched); >>> @@ -297,15 +307,26 @@ static int amdgpu_vm_clear_bo(struct >>> amdgpu_device *adev, >>>      if (r) >>>          goto error; >>>  -   addr = amdgpu_bo_gpu_offset(bo); >>> -   entries = amdgpu_bo_size(bo) / 8; >>> - >>>      r = amdgpu_job_alloc_with_ib(adev, 64, &job); >>>      if (r) >>>          goto error; >>>  -   amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, >>> -                 entries, 0, init_value); >>> +   if (ats_entries) { >>> +       uint64_t ats_value; >>> + >>> +       ats_value = AMDGPU_PTE_DEFAULT_ATC; >>> +       if (level != AMDGPU_VM_PTB) >>> +           ats_value |= AMDGPU_PDE_PTE; >>> + >>> +       amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, >>> +                     ats_entries, 0, ats_value); >>> +       addr += ats_entries * 8; >>> +   } >>> + >>> +   if (entries) >>> +       amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, >>> +                     entries, 0, 0); >>> + >>>      amdgpu_ring_pad_ib(ring, &job->ibs[0]); >>>       WARN_ON(job->ibs[0].length_dw > 64); >>> @@ -339,7 +360,7 @@ static int amdgpu_vm_alloc_levels(struct >>> amdgpu_device *adev, >>>                    struct amdgpu_vm *vm, >>>                    struct amdgpu_vm_pt *parent, >>>                    uint64_t saddr, uint64_t eaddr, >>> -                 unsigned level) >>> +                 unsigned level, bool ats) >>>  { >>>      unsigned shift = amdgpu_vm_level_shift(adev, level); >>>      unsigned pt_idx, from, to; >>> @@ -389,7 +410,7 @@ static int amdgpu_vm_alloc_levels(struct >>> amdgpu_device *adev, >>>              if (r) >>>                  return r; >>>  -           r = amdgpu_vm_clear_bo(adev, vm, pt, level); >>> +           r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); >>>              if (r) { >>>                  amdgpu_bo_unref(&pt); >>>                  return r; >>> @@ -421,7 +442,7 @@ static int amdgpu_vm_alloc_levels(struct >>> amdgpu_device *adev, >>>              uint64_t sub_eaddr = (pt_idx == to) ? eaddr : >>>                  ((1 << shift) - 1); >>>              r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, >>> -                          sub_eaddr, level); >>> +                          sub_eaddr, level, ats); >>>              if (r) >>>                  return r; >>>          } >>> @@ -444,26 +465,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device >>> *adev, >>>              struct amdgpu_vm *vm, >>>              uint64_t saddr, uint64_t size) >>>  { >>> -   uint64_t last_pfn; >>>      uint64_t eaddr; >>> +   bool ats = false; >>>       /* validate the parameters */ >>>      if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) >>>          return -EINVAL; >>>       eaddr = saddr + size - 1; >>> -   last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; >>> -   if (last_pfn >= adev->vm_manager.max_pfn) { >>> -       dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", >>> -           last_pfn, adev->vm_manager.max_pfn); >>> -       return -EINVAL; >>> -   } >>> + >>> +   if (vm->pte_support_ats) >>> +       ats = saddr < AMDGPU_VA_HOLE_START; >>>       saddr /= AMDGPU_GPU_PAGE_SIZE; >>>      eaddr /= AMDGPU_GPU_PAGE_SIZE; >>>  +   if (eaddr >= adev->vm_manager.max_pfn) { >>> +       dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", >>> +           eaddr, adev->vm_manager.max_pfn); >>> +       return -EINVAL; >>> +   } >>> + >>>      return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, >>> -                     adev->vm_manager.root_level); >>> +                     adev->vm_manager.root_level, ats); >>>  } >>>   /** >>> @@ -1665,16 +1689,16 @@ int amdgpu_vm_clear_freed(struct >>> amdgpu_device *adev, >>>                struct dma_fence **fence) >>>  { >>>      struct amdgpu_bo_va_mapping *mapping; >>> +   uint64_t init_pte_value = 0; >>>      struct dma_fence *f = NULL; >>>      int r; >>> -   uint64_t init_pte_value = 0; >>>       while (!list_empty(&vm->freed)) { >>>          mapping = list_first_entry(&vm->freed, >>>              struct amdgpu_bo_va_mapping, list); >>>          list_del(&mapping->list); >>>  -       if (vm->pte_support_ats) >>> +       if (vm->pte_support_ats && mapping->start < >>> AMDGPU_VA_HOLE_START) >>>              init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >>>           r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, >>> @@ -2367,7 +2391,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, >>> struct amdgpu_vm *vm, >>>          goto error_free_root; >>>       r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, >>> -                  adev->vm_manager.root_level); >>> +                  adev->vm_manager.root_level, >>> +                  vm->pte_support_ats); >>>      if (r) >>>          goto error_unreserve; >>>  >