Re: [PATCH] drm/ttm: fix busy memory to fail other user

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 24.04.19 um 13:40 schrieb Chunming Zhou:
basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).

Please split that step into a separate patch.

2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou <david1.zhou@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 14 ++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  2 +-
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++++--
  drivers/gpu/drm/ttm/ttm_bo.c                  | 58 +++++++++++++++++--
  include/drm/ttm/ttm_bo_api.h                  |  1 +
  7 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index affde72b44db..e08ff2244070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -791,6 +791,7 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
   * @domain: domain to be pinned to
   * @min_offset: the start of requested address range
   * @max_offset: the end of requested address range
+ * @ticket: struct ww_acquire_ctx ticket, if the pin BO has a ticket assigned
   *
   * Pins the buffer object according to requested domain and address range. If
   * the memory is unbound gart memory, binds the pages into gart table. Adjusts
@@ -808,10 +809,17 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
   * 0 for success or a negative error code on failure.
   */
  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
-			     u64 min_offset, u64 max_offset)
+			     u64 min_offset, u64 max_offset,
+			     struct ww_acquire_ctx *ticket)
  {
  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	struct ttm_operation_ctx ctx = { false, false };
+	struct ttm_operation_ctx ctx = {
+                .interruptible = false,
+                .no_wait_gpu = false,
+                .resv = bo->tbo.resv,
+		.ticket = ticket,

We can actually get the ticket from the BO using bo->resv->lock.ctx. That should make the whole patch less work.

+                .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
+        };
  	int r, i;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
@@ -907,7 +915,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
   */
  int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
  {
-	return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, NULL);
  }
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index c430e8259038..41e7fff7f3f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -247,7 +247,8 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
  void amdgpu_bo_unref(struct amdgpu_bo **bo);
  int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
-			     u64 min_offset, u64 max_offset);
+			     u64 min_offset, u64 max_offset,
+			     struct ww_acquire_ctx *ticket);
  int amdgpu_bo_unpin(struct amdgpu_bo *bo);
  int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
  int amdgpu_bo_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..dd31ce1a09e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1150,7 +1150,7 @@ static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
  	r = amdgpu_bo_pin_restricted(bo,
  			AMDGPU_GEM_DOMAIN_VRAM,
  			offset,
-			offset + size);
+			offset + size, NULL);
  	if (r)
  		goto error_pin;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index afccca5b1f5f..e63af0debc7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1661,7 +1661,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
  			AMDGPU_GEM_DOMAIN_VRAM,
  			adev->fw_vram_usage.start_offset,
  			(adev->fw_vram_usage.start_offset +
-			adev->fw_vram_usage.size));
+			adev->fw_vram_usage.size), NULL);
  		if (r)
  			goto error_pin;
  		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a5cacf846e1b..bdbbc3891585 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4101,6 +4101,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
  	struct amdgpu_device *adev;
  	struct amdgpu_bo *rbo;
  	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
+	struct list_head list, duplicates;
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
  	uint64_t tiling_flags;
  	uint32_t domain;
  	int r;
@@ -4119,32 +4122,43 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
  	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
  	r = amdgpu_bo_reserve(rbo, false);
  	if (unlikely(r != 0))
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+
+	tv.bo = &rbo->tbo;
+	tv.num_shared = 1;
+	list_add(&tv.head, &list);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
  		return r;
+	}
if (plane->type != DRM_PLANE_TYPE_CURSOR)
  		domain = amdgpu_display_supported_domains(adev);
  	else
  		domain = AMDGPU_GEM_DOMAIN_VRAM;
- r = amdgpu_bo_pin(rbo, domain);
+	r = amdgpu_bo_pin_restricted(rbo, domain, 0, 0, &ticket);
  	if (unlikely(r != 0)) {
  		if (r != -ERESTARTSYS)
  			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
  		return r;
  	}
r = amdgpu_ttm_alloc_gart(&rbo->tbo);
  	if (unlikely(r != 0)) {
  		amdgpu_bo_unpin(rbo);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
  		DRM_ERROR("%p bind failed\n", rbo);
  		return r;
  	}
amdgpu_bo_get_tiling_flags(rbo, &tiling_flags); - amdgpu_bo_unreserve(rbo);
+	ttm_eu_backoff_reservation(&ticket, &list);
afb->address = amdgpu_bo_gpu_offset(rbo); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..112fea8f21f9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,12 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
   * b. Otherwise, trylock it.
   */
  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-			struct ttm_operation_ctx *ctx, bool *locked)
+			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
  {
  	bool ret = false;
*locked = false;
+	*busy = false;
  	if (bo->resv == ctx->resv) {
  		reservation_object_assert_held(bo->resv);
  		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,6 +780,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
  	} else {
  		*locked = reservation_object_trylock(bo->resv);
  		ret = *locked;
+		if (ret)
+			*busy = true;
  	}
return ret;
@@ -791,16 +794,20 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
  {
  	struct ttm_bo_global *glob = bdev->glob;
  	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct ttm_buffer_object *bo = NULL;
-	bool locked = false;
+	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+	bool locked = false, list_busy = false;
  	unsigned i;
  	int ret;
spin_lock(&glob->lru_lock);
  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
  		list_for_each_entry(bo, &man->lru[i], lru) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
+			bool busy = false;
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							    &busy)) {
+				list_busy |= busy;
  				continue;
+			}
if (place && !bdev->driver->eviction_valuable(bo,
  								      place)) {
@@ -819,8 +826,44 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
  	}
if (!bo) {
+		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+			if (list_empty(&man->lru[i]))
+				continue;
+			bo = list_first_entry(&man->lru[i],
+					      struct ttm_buffer_object,
+					      lru);
+
+			break;
+		}

Maybe keep the first busy BO from the list walk above instead of the list_busy flag.

  		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+		if (!list_busy || !ctx->ticket)
+			return -EBUSY;
+		if (ctx->interruptible) {
+			ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+							       ctx->ticket);
+		} else {
+			ww_mutex_lock_slow(&bo->resv->lock, ctx->ticket);

You MUST use the ww_mutex_lock and ww_mutex_lock_interruptible funtions here and NOT the slow path versions.

AND then check the return code for a deadlock.

Christian.

+			ret = 0;
+		}
+		if (ret)
+			return ret;
+		spin_lock(&glob->lru_lock);
+		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+			if (list_empty(&man->lru[i]))
+				continue;
+			first_bo = list_first_entry(&man->lru[i],
+						    struct ttm_buffer_object,
+						    lru);
+
+			break;
+		}
+		spin_unlock(&glob->lru_lock);
+		/* verify if BO have been moved */
+		if (first_bo != bo) {
+			ww_mutex_unlock(&bo->resv->lock);
+			return -EBUSY;
+		}
+		/* ok, pick up first busy BO to wait to evict */
  	}
kref_get(&bo->list_kref);
@@ -1784,7 +1827,10 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
  	spin_lock(&glob->lru_lock);
  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
  		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
+			bool busy = false;
+
+			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							   &busy)) {
  				ret = 0;
  				break;
  			}
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 49d9cdfc58f2..5bb879f117e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -276,6 +276,7 @@ struct ttm_operation_ctx {
  	bool interruptible;
  	bool no_wait_gpu;
  	struct reservation_object *resv;
+	struct ww_acquire_ctx *ticket;
  	uint64_t bytes_moved;
  	uint32_t flags;
  };

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux