On 23.03.2017 19:18, Christian König wrote: > Am 23.03.2017 um 18:22 schrieb Nicolai Hähnle: >> From: Nicolai Hähnle <nicolai.haehnle at amd.com> >> >> Also, add the fence of the clear operations to the BO to ensure that >> the underlying memory can only be re-used after all PTEs pointing to >> it have been cleared. >> >> This avoids the following sequence of events that could be triggered >> by user space: >> >> 1. Submit a CS that accesses some BO _without_ adding that BO to the >> buffer list. >> 2. Free that BO. >> 3. Some other task re-uses the memory underlying the BO. >> 4. The CS is submitted to the hardware and accesses memory that is >> now already in use by somebody else. >> >> By clearing the page tables immediately in step 2, a GPU VM fault will >> be triggered in step 4 instead of wild memory accesses. > > First of all please split adding the fence parameter to > amdgpu_vm_clear_freed() into a separate patch. Sure, I'll do that. > Not a must have, but that should make it easier to review. > >> >> Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++++++++++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 ++++++++++++++------ >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- >> 4 files changed, 28 insertions(+), 10 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index 55d553a..85e6070 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct >> amdgpu_cs_parser *p) >> int i, r; >> r = amdgpu_vm_update_page_directory(adev, vm); >> if (r) >> return r; >> r = amdgpu_sync_fence(adev, &p->job->sync, >> vm->page_directory_fence); >> if (r) >> return r; >> - r = amdgpu_vm_clear_freed(adev, vm); >> + r = amdgpu_vm_clear_freed(adev, vm, NULL); >> if (r) >> return r; >> r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); >> if (r) >> return r; >> r = amdgpu_sync_fence(adev, &p->job->sync, >> fpriv->prt_va->last_pt_update); >> if (r) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> index be9fb2c..bd2daef 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> @@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct >> drm_gem_object *obj, >> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); >> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >> struct amdgpu_fpriv *fpriv = file_priv->driver_priv; >> struct amdgpu_vm *vm = &fpriv->vm; >> struct amdgpu_bo_list_entry vm_pd; >> struct list_head list, duplicates; >> struct ttm_validate_buffer tv; >> struct ww_acquire_ctx ticket; >> struct amdgpu_bo_va *bo_va; >> + struct fence *fence = NULL; >> int r; >> INIT_LIST_HEAD(&list); >> INIT_LIST_HEAD(&duplicates); >> tv.bo = &bo->tbo; >> tv.shared = true; >> list_add(&tv.head, &list); >> amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); >> @@ -166,23 +167,31 @@ void amdgpu_gem_object_close(struct >> drm_gem_object *obj, >> r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); >> if (r) { >> dev_err(adev->dev, "leaking bo va because " >> "we fail to reserve bo (%d)\n", r); >> return; >> } >> bo_va = amdgpu_vm_bo_find(vm, bo); >> if (bo_va) { >> if (--bo_va->ref_count == 0) { >> amdgpu_vm_bo_rmv(adev, bo_va); >> + >> + amdgpu_vm_clear_freed(adev, vm, &fence); >> } >> } >> - ttm_eu_backoff_reservation(&ticket, &list); >> + >> + if (fence) { >> + ttm_eu_fence_buffer_objects(&ticket, &list, fence); >> + fence_put(fence); >> + } else { >> + ttm_eu_backoff_reservation(&ticket, &list); >> + } > > Ah, now I remember why we didn't do that before. We could run into > problems because amdgpu_gem_object_close() can't fail and adding this > needs memory. > > Anyway, "tv.shared = true;" was there before. So your patch doesn't make > it any worse. > > But I suggest to use amdgpu_bo_fence() instead of > ttm_eu_fence_buffer_objects(). This way we don't try to add the fence to > the page directory. Just checked, the fence is added to the page directory anyway, in amdgpu_vm_bo_update_mapping. I think that's necessary to make sure subsequent CS submissions see the cleared page tables. Anyway, it still makes sense to remove the call to ttm_eu_fence_buffer_objects here. That's more explicit about who is responsible for adding fences to what. Thanks, Nicolai > > Apart from that the patch looks good to me, > Christian. > >> } >> static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, >> int r) >> { >> if (r == -EDEADLK) { >> r = amdgpu_gpu_reset(adev); >> if (!r) >> r = -EAGAIN; >> } >> return r; >> @@ -535,21 +544,21 @@ static void amdgpu_gem_va_update_vm(struct >> amdgpu_device *adev, >> r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check, >> NULL); >> if (r) >> goto error; >> r = amdgpu_vm_update_page_directory(adev, vm); >> if (r) >> goto error; >> - r = amdgpu_vm_clear_freed(adev, vm); >> + r = amdgpu_vm_clear_freed(adev, vm, NULL); >> if (r) >> goto error; >> if (operation == AMDGPU_VA_OP_MAP || >> operation == AMDGPU_VA_OP_REPLACE) >> r = amdgpu_vm_bo_update(adev, bo_va, false); >> error: >> if (r && r != -ERESTARTSYS) >> DRM_ERROR("Couldn't update BO_VA (%d)\n", r); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index dd7df45..b6029ed 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -1397,48 +1397,56 @@ static void amdgpu_vm_prt_fini(struct >> amdgpu_device *adev, struct amdgpu_vm *vm) >> } >> kfree(shared); >> } >> /** >> * amdgpu_vm_clear_freed - clear freed BOs in the PT >> * >> * @adev: amdgpu_device pointer >> * @vm: requested vm >> + * @fence: optional resulting fence >> * >> * Make sure all freed BOs are cleared in the PT. >> * Returns 0 for success. >> * >> * PTs have to be reserved and mutex must be locked! >> */ >> int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >> - struct amdgpu_vm *vm) >> + struct amdgpu_vm *vm, >> + struct fence **fence) >> { >> struct amdgpu_bo_va_mapping *mapping; >> - struct fence *fence = NULL; >> + struct fence *f = NULL; >> int r; >> while (!list_empty(&vm->freed)) { >> mapping = list_first_entry(&vm->freed, >> struct amdgpu_bo_va_mapping, list); >> list_del(&mapping->list); >> r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, >> mapping, >> - 0, 0, &fence); >> - amdgpu_vm_free_mapping(adev, vm, mapping, fence); >> + 0, 0, &f); >> + amdgpu_vm_free_mapping(adev, vm, mapping, f); >> if (r) { >> - fence_put(fence); >> + fence_put(f); >> return r; >> } >> + } >> + if (fence && f) { >> + fence_put(*fence); >> + *fence = f; >> + } else { >> + fence_put(f); >> } >> - fence_put(fence); >> + >> return 0; >> } >> /** >> * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT >> * >> * @adev: amdgpu_device pointer >> * @vm: requested vm >> * >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index ff10fa5..9d5a572 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -187,21 +187,22 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> uint64_t saddr, uint64_t size); >> int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, >> struct amdgpu_sync *sync, struct fence *fence, >> struct amdgpu_job *job); >> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); >> void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); >> int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, >> struct amdgpu_vm *vm); >> int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >> - struct amdgpu_vm *vm); >> + struct amdgpu_vm *vm, >> + struct fence **fence); >> int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct >> amdgpu_vm *vm, >> struct amdgpu_sync *sync); >> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >> struct amdgpu_bo_va *bo_va, >> bool clear); >> void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, >> struct amdgpu_bo *bo); >> struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, >> struct amdgpu_bo *bo); >> struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, > >