Re: [PATCH v7 11/12] drm/amdgpu: introduce userqueue eviction fence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 10.10.23 um 12:17 schrieb Shashank Sharma:
This patch adds support for userqueue eviction fences. In general, when
a process wants to map VRAM memory but TTM can't find enough space, it
attempts to evict BOs from its LRU list. This fence will prevent the TTM
manager from evicting the process's BOs from VRAM.

Maybe write that the other way around. The eviction fence unmaps the userq when memory management requests it.


The general idea behind this is:
- Eviction fence is initialized during the uq_mgr init and saved in
   fpriv->uq_mgr.
- This fence is attached to every userqueue object (MQD, ctx, doorbell
   and wptr) in a shared way, during the queue creation.
- The fence is signaled during the queue destruction.

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Felix Kuehling <felix.kuehling@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 82 ++++++++++++++++++-
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        |  7 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    | 15 ++++
  3 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 6bae014b248e..26cdd54acd74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -21,7 +21,7 @@
   * OTHER DEALINGS IN THE SOFTWARE.
   *
   */
-
+#include <linux/atomic.h>
  #include "amdgpu.h"
  #include "amdgpu_vm.h"
  #include "amdgpu_userqueue.h"
@@ -45,6 +45,66 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
  	return idr_find(&uq_mgr->userq_idr, qid);
  }
+static const char *
+amdgpu_userqueue_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu";
+}
+
+static const char *
+amdgpu_userqueue_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_userq_fence *ef = container_of(f, struct amdgpu_userq_fence, base);
+
+	return ef->timeline_name;
+}
+
+static const struct dma_fence_ops amdgpu_userqueue_eviction_fence_ops = {
+	.use_64bit_seqno = true,
+	.get_driver_name = amdgpu_userqueue_fence_get_driver_name,
+	.get_timeline_name = amdgpu_userqueue_fence_get_timeline_name,
+};
+
+static void
+amdgpu_userqueue_init_eviction_fence(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_userq_fence *fence = &uq_mgr->eviction_fence;
+	atomic_t seq = ATOMIC_INIT(0);
+
+	spin_lock_init(&fence->lock);
+	fence->fence_ctx = dma_fence_context_alloc(1);

Not much of an issue, but that should be per userq_mgr.

+	fence->seq = seq;

Same here, the seq needs to be in the userq_mgr and not inside the fence itself.

+	get_task_comm(fence->timeline_name, current);
+	dma_fence_init(&fence->base, &amdgpu_userqueue_eviction_fence_ops,
+			&fence->lock, fence->fence_ctx,
+			atomic_inc_return(&fence->seq));
+}
+
+struct dma_fence *
+amdgpu_userqueue_attach_eviction_fence(struct amdgpu_userq_mgr *uq_mgr,
+				       struct amdgpu_bo *bo)
+{
+	struct dma_fence *ef = &uq_mgr->eviction_fence.base;
+	struct dma_resv *resv = bo->tbo.base.resv;
+	int ret;
+
+	ret = dma_resv_reserve_fences(resv, 1);

That should usually be done while locking the BO and not later on.

The problem is that when you have multiple BO where to add the fence to you can't risk adding it only to some of them.

+	if (ret) {
+		dma_fence_wait(ef, false);
+		return NULL;
+	}
+
+	dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_READ);

Please use DMA_RESV_USAGE_BOOKMARK for this.

+	return dma_fence_get(ef);

Why do you need to return the fence here?

+}
+
+void
+amdgpu_userqueue_signal_eviction_fence(struct dma_fence *ef)
+{
+	dma_fence_signal(ef);
+	dma_fence_put(ef);
+}
+
  int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
  				   struct amdgpu_userq_obj *userq_obj,
  				   int size)
@@ -88,6 +148,13 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
  	}
userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+	userq_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, userq_obj->obj);
+	if (!userq_obj->ev_fence) {
+		DRM_ERROR("Failed to attach eviction fence to FW object\n");
+		r = -EFAULT;
+		goto unresv;
+	}
+

We don't need to keep the ev_fence around here.

  	amdgpu_bo_unreserve(userq_obj->obj);
  	memset(userq_obj->cpu_ptr, 0, size);
  	return 0;
@@ -103,6 +170,7 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
  void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
  				   struct amdgpu_userq_obj *userq_obj)
  {
+	amdgpu_userqueue_signal_eviction_fence(userq_obj->ev_fence);
  	amdgpu_bo_kunmap(userq_obj->obj);
  	amdgpu_bo_unref(&userq_obj->obj);
  }
@@ -140,11 +208,21 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
  		goto unpin_bo;
  	}
+ db_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, db_obj->obj);
+	if (!db_obj->ev_fence) {
+		DRM_ERROR("[Usermode queues] Failed to attach eviction fence with db_bo\n");
+		r = -EFAULT;
+		goto unres_bo;
+	}
+
  	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, doorbell_offset);
  	DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index);
  	amdgpu_bo_unreserve(db_obj->obj);
  	return index;
+unres_bo:
+	amdgpu_bo_unreserve(db_obj->obj);
+
  unpin_bo:
  	amdgpu_bo_unpin(db_obj->obj);
@@ -171,6 +249,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unpin(queue->db_obj.obj);
  	amdgpu_bo_unref(&queue->db_obj.obj);
+	amdgpu_userqueue_signal_eviction_fence(queue->db_obj.ev_fence);
  	amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
  	mutex_unlock(&uq_mgr->userq_mutex);
  	return 0;
@@ -278,6 +357,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
  	idr_init_base(&userq_mgr->userq_idr, 1);
  	userq_mgr->adev = adev;
+ amdgpu_userqueue_init_eviction_fence(userq_mgr);
  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 33de65a0d974..e68320b4c689 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6469,6 +6469,12 @@ gfx_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
  		return ret;
  	}
+ wptr_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, wptr_obj->obj);

Again you don't need to keep the ev_fence around here. The dma_resv object will keep the reference.

Looks like a start, but quite a bunch of stuff to do.

+	if (!wptr_obj->ev_fence) {
+		DRM_ERROR("Failed to attach eviction fence to wptr bo\n");
+		return -EFAULT;
+	}
+
  	queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
  	return 0;
  }
@@ -6650,6 +6656,7 @@ static void
  gfx_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
  {
  	gfx_v11_0_userq_unmap(uq_mgr, queue);
+	amdgpu_userqueue_signal_eviction_fence(queue->wptr_obj.ev_fence);
  	amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
  	amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
  }
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index a653e31350c5..f1e3d311ae18 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -33,6 +33,7 @@ struct amdgpu_userq_obj {
  	void		 *cpu_ptr;
  	uint64_t	 gpu_addr;
  	struct amdgpu_bo *obj;
+	struct dma_fence *ev_fence;
  };
struct amdgpu_usermode_queue {
@@ -57,11 +58,20 @@ struct amdgpu_userq_funcs {
  			    struct amdgpu_usermode_queue *uq);
  };
+struct amdgpu_userq_fence {
+	u64		 fence_ctx;
+	atomic_t	 seq;
+	spinlock_t	 lock;
+	struct dma_fence base;
+	char		 timeline_name[TASK_COMM_LEN];
+};
+
  /* Usermode queues for gfx */
  struct amdgpu_userq_mgr {
  	struct idr			userq_idr;
  	struct mutex			userq_mutex;
  	struct amdgpu_device		*adev;
+	struct amdgpu_userq_fence	eviction_fence;
  };
int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
@@ -76,4 +86,9 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
  				     struct amdgpu_userq_obj *userq_obj);
+
+struct dma_fence *amdgpu_userqueue_attach_eviction_fence(struct amdgpu_userq_mgr *uq_mgr,
+							 struct amdgpu_bo *bo);
+
+void amdgpu_userqueue_signal_eviction_fence(struct dma_fence *ef);
  #endif




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux