Re: [PATCH 1/4] drm/amdgpu: Implement page table BO fence

Felix Kuehling <felix.kuehling@xxxxxxx> · Thu, 1 Jun 2023 16:12:26 -0400

On 2023-06-01 15:31, Philip Yang wrote:
Add pt_fence to amdgpu vm structure and implement helper functions. This
fence will be shared by all page table BOs of the same amdgpu vm.

Suggested-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 45 +++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  1 +
  4 files changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5af954abd5ba..09c116dfda31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1499,6 +1499,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
  int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
  			       enum amd_powergating_state state);
  
+struct dma_fence *amdgpu_pt_fence_create(void);
+
  static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
  {
  	return amdgpu_gpu_recovery != 0 &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c694b41f6461..d9bfb0af3a09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -57,6 +57,16 @@ struct amdgpu_fence {
  	ktime_t				start_timestamp;
  };
  
+/*
+ * Page table BO fence
+ * Fence used to ensure page table BOs are freed after TLB is flushed, to avoid
+ * H/W access corrupted data if the freed BO page is reused.
+ */
+struct amdgpu_pt_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+};
+
  static struct kmem_cache *amdgpu_fence_slab;
  
  int amdgpu_fence_slab_init(void)
@@ -79,6 +89,7 @@ void amdgpu_fence_slab_fini(void)
   */
  static const struct dma_fence_ops amdgpu_fence_ops;
  static const struct dma_fence_ops amdgpu_job_fence_ops;
+static const struct dma_fence_ops amdgpu_pt_fence_ops;
  static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
  {
  	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
@@ -852,6 +863,40 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
  	.release = amdgpu_job_fence_release,
  };
  
+static atomic_t pt_fence_seq = ATOMIC_INIT(0);
+
+struct dma_fence *amdgpu_pt_fence_create(void)
+{
+	struct amdgpu_pt_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	spin_lock_init(&fence->lock);
+	dma_fence_init(&fence->base, &amdgpu_pt_fence_ops, &fence->lock,
+		       dma_fence_context_alloc(1), atomic_inc_return(&pt_fence_seq));

Creating a new fence context per fence is probably wrong. I think we 
only need one PT fence context per GPU or per spatial partition, or 
maybe per VM.

Regards,
  Felix


+
+	dma_fence_get(&fence->base);
+	return &fence->base;
+}
+
+static const char *amdgpu_pt_fence_get_timeline_name(struct dma_fence *f)
+{
+	return "pt_fence_timeline";
+}
+
+static void amdgpu_pt_fence_release(struct dma_fence *f)
+{
+	kfree_rcu(f, rcu);
+}
+
+static const struct dma_fence_ops amdgpu_pt_fence_ops = {
+	.get_driver_name = amdgpu_fence_get_driver_name,
+	.get_timeline_name = amdgpu_pt_fence_get_timeline_name,
+	.release = amdgpu_pt_fence_release,
+};
+
  /*
   * Fence debugfs
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 37b9d8a8dbec..0219398e625c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2147,6 +2147,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  		vm->update_funcs = &amdgpu_vm_sdma_funcs;
  
  	vm->last_update = dma_fence_get_stub();
+	vm->pt_fence = amdgpu_pt_fence_create();
  	vm->last_unlocked = dma_fence_get_stub();
  	vm->last_tlb_flush = dma_fence_get_stub();
  	vm->generation = 0;
@@ -2270,6 +2271,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  
  	dma_fence_put(vm->last_update);
  	vm->last_update = dma_fence_get_stub();
+	dma_fence_put(vm->pt_fence);
+	vm->pt_fence = amdgpu_pt_fence_create();
  	vm->is_compute_context = true;
  
  	/* Free the shadow bo for compute VM */
@@ -2358,6 +2361,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  	}
  
  	dma_fence_put(vm->last_update);
+	dma_fence_put(vm->pt_fence);
  	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
  		amdgpu_vmid_free_reserved(adev, vm, i);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d551fca1780e..9cc729358612 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -286,6 +286,7 @@ struct amdgpu_vm {
  	/* contains the page directory */
  	struct amdgpu_vm_bo_base     root;
  	struct dma_fence	*last_update;
+	struct dma_fence	*pt_fence;
  
  	/* Scheduler entities for page table updates */
  	struct drm_sched_entity	immediate;