[PATCH 4/4] drm/amdgpu: fill only the lower range with ATS entries

ckoenig.leichtzumerken@xxxxxxxxx (Christian König) · Fri, 26 Jan 2018 21:26:18 +0100

Yeah, good point. I should better note that in the first patch.

Christian.

Am 26.01.2018 um 21:24 schrieb Felix Kuehling:
> So the first patch is not a straight revert, although the title looks
> like it is. I'll read the first patch more carefully.
>
>
> On 2018-01-26 03:21 PM, Christian KÃ¶nig wrote:
>> The amdgpu_vm_clear_bo function takes over this functionality in the
>> first patch.
>>
>> This patch only limits filling in the ats values in the lower halve of
>> the address range (previously it was filled in the whole address space).
>>
>> Regards,
>> Christian.
>>
>> Am 26.01.2018 um 21:18 schrieb Felix Kuehling:
>>> Shouldn't this change come before all the reverts? Otherwise you're
>>> briefly breaking ATS support on Raven for KFD.
>>>
>>> Regards,
>>>  Â Â  Felix
>>>
>>>
>>> On 2018-01-26 05:04 AM, Christian KÃ¶nig wrote:
>>>> At least on x86-64 the upper range is purely used by the kernel,
>>>> avoid creating any ATS mappings there as security precaution and to
>>>> allow proper page fault reporting in the upper range.
>>>>
>>>> Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
>>>> ---
>>>>  Â  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 83
>>>> ++++++++++++++++++++++------------
>>>>  Â  1 file changed, 54 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 14798e20abca..a3b9c3976eb3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -267,24 +267,34 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
>>>>  Â Â  * Root PD needs to be reserved when calling this.
>>>>  Â Â  */
>>>>  Â  static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_vm *vm,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_bo *bo,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned level)
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_vm *vm, struct amdgpu_bo *bo,
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned level, bool pte_support_ats)
>>>>  Â  {
>>>>  Â Â Â Â Â  struct ttm_operation_ctx ctx = { true, false };
>>>>  Â Â Â Â Â  struct dma_fence *fence = NULL;
>>>> -Â Â Â  uint64_t addr, init_value;
>>>> +Â Â Â  unsigned entries, ats_entries;
>>>> +Â Â Â  uint64_t addr, ats_value;
>>>>  Â Â Â Â Â  struct amdgpu_ring *ring;
>>>>  Â Â Â Â Â  struct amdgpu_job *job;
>>>> -Â Â Â  unsigned entries;
>>>>  Â Â Â Â Â  int r;
>>>>  Â  -Â Â Â  if (vm->pte_support_ats) {
>>>> -Â Â Â Â Â Â Â  init_value = AMDGPU_PTE_DEFAULT_ATC;
>>>> -Â Â Â Â Â Â Â  if (level != AMDGPU_VM_PTB)
>>>> -Â Â Â Â Â Â Â Â Â Â Â  init_value |= AMDGPU_PDE_PTE;
>>>> +Â Â Â  addr = amdgpu_bo_gpu_offset(bo);
>>>> +Â Â Â  entries = amdgpu_bo_size(bo) / 8;
>>>> +
>>>> +Â Â Â  if (pte_support_ats) {
>>>> +Â Â Â Â Â Â Â  if (level == adev->vm_manager.root_level) {
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_entries = amdgpu_vm_level_shift(adev, level);
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_entries += AMDGPU_GPU_PAGE_SHIFT;
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_entries = min(ats_entries, entries);
>>>> +Â Â Â Â Â Â Â Â Â Â Â  entries -= ats_entries;
>>>> +Â Â Â Â Â Â Â  } else {
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_entries = entries;
>>>> +Â Â Â Â Â Â Â Â Â Â Â  entries = 0;
>>>> +Â Â Â Â Â Â Â  }
>>>>  Â Â Â Â Â  } else {
>>>> -Â Â Â Â Â Â Â  init_value = 0;
>>>> +Â Â Â Â Â Â Â  ats_entries = 0;
>>>> +Â Â Â Â Â Â Â  ats_value = 0;
>>>>  Â Â Â Â Â  }
>>>>  Â  Â Â Â Â Â  ring = container_of(vm->entity.sched, struct amdgpu_ring,
>>>> sched);
>>>> @@ -297,15 +307,26 @@ static int amdgpu_vm_clear_bo(struct
>>>> amdgpu_device *adev,
>>>>  Â Â Â Â Â  if (r)
>>>>  Â Â Â Â Â Â Â Â Â  goto error;
>>>>  Â  -Â Â Â  addr = amdgpu_bo_gpu_offset(bo);
>>>> -Â Â Â  entries = amdgpu_bo_size(bo) / 8;
>>>> -
>>>>  Â Â Â Â Â  r = amdgpu_job_alloc_with_ib(adev, 64, &job);
>>>>  Â Â Â Â Â  if (r)
>>>>  Â Â Â Â Â Â Â Â Â  goto error;
>>>>  Â  -Â Â Â  amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  entries, 0, init_value);
>>>> +Â Â Â  if (ats_entries) {
>>>> +Â Â Â Â Â Â Â  uint64_t ats_value;
>>>> +
>>>> +Â Â Â Â Â Â Â  ats_value = AMDGPU_PTE_DEFAULT_ATC;
>>>> +Â Â Â Â Â Â Â  if (level != AMDGPU_VM_PTB)
>>>> +Â Â Â Â Â Â Â Â Â Â Â  ats_value |= AMDGPU_PDE_PTE;
>>>> +
>>>> +Â Â Â Â Â Â Â  amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ats_entries, 0, ats_value);
>>>> +Â Â Â Â Â Â Â  addr += ats_entries * 8;
>>>> +Â Â Â  }
>>>> +
>>>> +Â Â Â  if (entries)
>>>> +Â Â Â Â Â Â Â  amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  entries, 0, 0);
>>>> +
>>>>  Â Â Â Â Â  amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>>>  Â  Â Â Â Â Â  WARN_ON(job->ibs[0].length_dw > 64);
>>>> @@ -339,7 +360,7 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_vm *vm,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_vm_pt *parent,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  uint64_t saddr, uint64_t eaddr,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned level)
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned level, bool ats)
>>>>  Â  {
>>>>  Â Â Â Â Â  unsigned shift = amdgpu_vm_level_shift(adev, level);
>>>>  Â Â Â Â Â  unsigned pt_idx, from, to;
>>>> @@ -389,7 +410,7 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
>>>>  Â  -Â Â Â Â Â Â Â Â Â Â Â  r = amdgpu_vm_clear_bo(adev, vm, pt, level);
>>>> +Â Â Â Â Â Â Â Â Â Â Â  r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r) {
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_bo_unref(&pt);
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
>>>> @@ -421,7 +442,7 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ((1 << shift) - 1);
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_eaddr, level);
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_eaddr, level, ats);
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
>>>>  Â Â Â Â Â Â Â Â Â  }
>>>> @@ -444,26 +465,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device
>>>> *adev,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_vm *vm,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  uint64_t saddr, uint64_t size)
>>>>  Â  {
>>>> -Â Â Â  uint64_t last_pfn;
>>>>  Â Â Â Â Â  uint64_t eaddr;
>>>> +Â Â Â  bool ats = false;
>>>>  Â  Â Â Â Â Â  /* validate the parameters */
>>>>  Â Â Â Â Â  if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
>>>>  Â Â Â Â Â Â Â Â Â  return -EINVAL;
>>>>  Â  Â Â Â Â Â  eaddr = saddr + size - 1;
>>>> -Â Â Â  last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
>>>> -Â Â Â  if (last_pfn >= adev->vm_manager.max_pfn) {
>>>> -Â Â Â Â Â Â Â  dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>>>> -Â Â Â Â Â Â Â Â Â Â Â  last_pfn, adev->vm_manager.max_pfn);
>>>> -Â Â Â Â Â Â Â  return -EINVAL;
>>>> -Â Â Â  }
>>>> +
>>>> +Â Â Â  if (vm->pte_support_ats)
>>>> +Â Â Â Â Â Â Â  ats = saddr < AMDGPU_VA_HOLE_START;
>>>>  Â  Â Â Â Â Â  saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>  Â Â Â Â Â  eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>  Â  +Â Â Â  if (eaddr >= adev->vm_manager.max_pfn) {
>>>> +Â Â Â Â Â Â Â  dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>>>> +Â Â Â Â Â Â Â Â Â Â Â  eaddr, adev->vm_manager.max_pfn);
>>>> +Â Â Â Â Â Â Â  return -EINVAL;
>>>> +Â Â Â  }
>>>> +
>>>>  Â Â Â Â Â  return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->vm_manager.root_level);
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->vm_manager.root_level, ats);
>>>>  Â  }
>>>>  Â  Â  /**
>>>> @@ -1665,16 +1689,16 @@ int amdgpu_vm_clear_freed(struct
>>>> amdgpu_device *adev,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct dma_fence **fence)
>>>>  Â  {
>>>>  Â Â Â Â Â  struct amdgpu_bo_va_mapping *mapping;
>>>> +Â Â Â  uint64_t init_pte_value = 0;
>>>>  Â Â Â Â Â  struct dma_fence *f = NULL;
>>>>  Â Â Â Â Â  int r;
>>>> -Â Â Â  uint64_t init_pte_value = 0;
>>>>  Â  Â Â Â Â Â  while (!list_empty(&vm->freed)) {
>>>>  Â Â Â Â Â Â Â Â Â  mapping = list_first_entry(&vm->freed,
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_bo_va_mapping, list);
>>>>  Â Â Â Â Â Â Â Â Â  list_del(&mapping->list);
>>>>  Â  -Â Â Â Â Â Â Â  if (vm->pte_support_ats)
>>>> +Â Â Â Â Â Â Â  if (vm->pte_support_ats && mapping->start <
>>>> AMDGPU_VA_HOLE_START)
>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>  Â  Â Â Â Â Â Â Â Â Â  r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>> @@ -2367,7 +2391,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>>> struct amdgpu_vm *vm,
>>>>  Â Â Â Â Â Â Â Â Â  goto error_free_root;
>>>>  Â  Â Â Â Â Â  r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->vm_manager.root_level);
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->vm_manager.root_level,
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  vm->pte_support_ats);
>>>>  Â Â Â Â Â  if (r)
>>>>  Â Â Â Â Â Â Â Â Â  goto error_unreserve;
>>>>    
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx