[PATCH v2 2/2] drm/amdgpu: clear freed mappings immediately when BO may be freed

Jerry.Zhang@xxxxxxx (Zhang, Jerry (Junwei)) · Fri, 24 Mar 2017 11:42:34 +0800

On 03/24/2017 03:27 AM, Nicolai HÃ¤hnle wrote:
> From: Nicolai HÃ¤hnle <nicolai.haehnle at amd.com>
>
> Also, add the fence of the clear operations to the BO to ensure that
> the underlying memory can only be re-used after all PTEs pointing to
> it have been cleared.
>
> This avoids the following sequence of events that could be triggered
> by user space:
>
> 1. Submit a CS that accesses some BO _without_ adding that BO to the
>     buffer list.
> 2. Free that BO.
> 3. Some other task re-uses the memory underlying the BO.
> 4. The CS is submitted to the hardware and accesses memory that is
>     now already in use by somebody else.
>
> By clearing the page tables immediately in step 2, a GPU VM fault will
> be triggered in step 4 instead of wild memory accesses.
>
> v2: use amdgpu_bo_fence directly
>
> Signed-off-by: Nicolai HÃ¤hnle <nicolai.haehnle at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 ++++++++++++
>   1 file changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 4a53c43..8b0f5f18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>
>   	struct amdgpu_bo_list_entry vm_pd;
>   	struct list_head list, duplicates;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
>   	struct amdgpu_bo_va *bo_va;
> +	struct fence *fence = NULL;
>   	int r;
>
>   	INIT_LIST_HEAD(&list);
>   	INIT_LIST_HEAD(&duplicates);
>
>   	tv.bo = &bo->tbo;
>   	tv.shared = true;
>   	list_add(&tv.head, &list);
>
>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
> @@ -166,20 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>   	if (r) {
>   		dev_err(adev->dev, "leaking bo va because "
>   			"we fail to reserve bo (%d)\n", r);
>   		return;
>   	}
>   	bo_va = amdgpu_vm_bo_find(vm, bo);
>   	if (bo_va) {
>   		if (--bo_va->ref_count == 0) {
>   			amdgpu_vm_bo_rmv(adev, bo_va);
> +
> +			r = amdgpu_vm_clear_freed(adev, vm, &fence);
> +			if (unlikely(r)) {
> +				dev_err(adev->dev, "failed to clear page "
> +					"tables on GEM object close (%d)\n", r);
> +			}
> +
> +			if (fence) {

I think it's always true.
Maybe you mean *fence?

> +				amdgpu_bo_fence(bo, fence, true);
> +				fence_put(fence);
> +			}
>   		}
>   	}
>   	ttm_eu_backoff_reservation(&ticket, &list);
>   }
>
>   static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
>   {
>   	if (r == -EDEADLK) {
>   		r = amdgpu_gpu_reset(adev);
>   		if (!r)
>