[PATCH] drm/ttm: fix use-after-free races in vm fault handling

nhaehnle@xxxxxxxxx (Nicolai Hähnle) · Sat, 18 Feb 2017 23:50:07 +0100

From: Nicolai HÃ¤hnle <nicolai.haehnle@xxxxxxx>

The vm fault handler relies on the fact that the VMA owns a reference
to the BO. However, once mmap_sem is released, other tasks are free to
destroy the VMA, which can lead to the BO being freed. Fix two code
paths where that can happen, both related to vm fault retries.

Found via a lock debugging warning which flagged &bo->wu_mutex as
locked while being destroyed.

Fixes: cbe12e74ee4e ("drm/ttm: Allow vm fault retries")
Signed-off-by: Nicolai HÃ¤hnle <nicolai.haehnle at amd.com>
--
This does not fix the random memory corruption I've been seeing.
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a6ed9d5..750733a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -66,8 +66,11 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
 			goto out_unlock;
 
+		ttm_bo_reference(bo);
 		up_read(&vma->vm_mm->mmap_sem);
 		(void) fence_wait(bo->moving, true);
+		ttm_bo_unreserve(bo);
+		ttm_bo_unref(&bo);
 		goto out_unlock;
 	}
 
@@ -120,8 +123,10 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				ttm_bo_reference(bo);
 				up_read(&vma->vm_mm->mmap_sem);
 				(void) ttm_bo_wait_unreserved(bo);
+				ttm_bo_unref(&bo);
 			}
 
 			return VM_FAULT_RETRY;
@@ -166,6 +171,13 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	ret = ttm_bo_vm_fault_idle(bo, vma, vmf);
 	if (unlikely(ret != 0)) {
 		retval = ret;
+
+		if (retval == VM_FAULT_RETRY &&
+		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+			/* The BO has already been unreserved. */
+			return retval;
+		}
+
 		goto out_unlock;
 	}
 
-- 
2.9.3